1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/BinaryFormat/Dwarf.h"
27 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCCodeView.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCDirectives.h"
32 #include "llvm/MC/MCDwarf.h"
33 #include "llvm/MC/MCExpr.h"
34 #include "llvm/MC/MCInstPrinter.h"
35 #include "llvm/MC/MCInstrDesc.h"
36 #include "llvm/MC/MCInstrInfo.h"
37 #include "llvm/MC/MCObjectFileInfo.h"
38 #include "llvm/MC/MCParser/AsmCond.h"
39 #include "llvm/MC/MCParser/AsmLexer.h"
40 #include "llvm/MC/MCParser/MCAsmLexer.h"
41 #include "llvm/MC/MCParser/MCAsmParser.h"
42 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
43 #include "llvm/MC/MCParser/MCAsmParserUtils.h"
44 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
45 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
46 #include "llvm/MC/MCRegisterInfo.h"
47 #include "llvm/MC/MCSection.h"
48 #include "llvm/MC/MCStreamer.h"
49 #include "llvm/MC/MCSymbol.h"
50 #include "llvm/MC/MCTargetOptions.h"
51 #include "llvm/MC/MCValue.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/Format.h"
56 #include "llvm/Support/MD5.h"
57 #include "llvm/Support/MathExtras.h"
58 #include "llvm/Support/MemoryBuffer.h"
59 #include "llvm/Support/SMLoc.h"
60 #include "llvm/Support/SourceMgr.h"
61 #include "llvm/Support/raw_ostream.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cctype>
65 #include <climits>
66 #include <cstddef>
67 #include <cstdint>
68 #include <deque>
69 #include <memory>
70 #include <sstream>
71 #include <string>
72 #include <tuple>
73 #include <utility>
74 #include <vector>
75 
76 using namespace llvm;
77 
78 extern cl::opt<unsigned> AsmMacroMaxNestingDepth;
79 
80 namespace {
81 
82 /// Helper types for tracking macro definitions.
83 typedef std::vector<AsmToken> MCAsmMacroArgument;
84 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
85 
86 /// Helper class for storing information about an active macro instantiation.
87 struct MacroInstantiation {
88   /// The location of the instantiation.
89   SMLoc InstantiationLoc;
90 
91   /// The buffer where parsing should resume upon instantiation completion.
92   unsigned ExitBuffer;
93 
94   /// The location where parsing should resume upon instantiation completion.
95   SMLoc ExitLoc;
96 
97   /// The depth of TheCondStack at the start of the instantiation.
98   size_t CondStackDepth;
99 };
100 
101 struct ParseStatementInfo {
102   /// The parsed operands from the last parsed statement.
103   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
104 
105   /// The opcode from the last parsed instruction.
106   unsigned Opcode = ~0U;
107 
108   /// Was there an error parsing the inline assembly?
109   bool ParseError = false;
110 
111   /// The value associated with a macro exit.
112   Optional<std::string> ExitValue;
113 
114   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
115 
116   ParseStatementInfo() = delete;
ParseStatementInfo__anon0f5ce1770111::ParseStatementInfo117   ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
118       : AsmRewrites(rewrites) {}
119 };
120 
121 enum FieldType {
122   FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
123   FT_REAL,     // Initializer: real number, stored as an APInt.
124   FT_STRUCT    // Initializer: struct initializer, stored recursively.
125 };
126 
127 struct FieldInfo;
128 struct StructInfo {
129   StringRef Name;
130   bool IsUnion = false;
131   unsigned Alignment = 0;
132   unsigned Size = 0;
133   unsigned AlignmentSize = 0;
134   std::vector<FieldInfo> Fields;
135   StringMap<size_t> FieldsByName;
136 
137   FieldInfo &addField(StringRef FieldName, FieldType FT,
138                       unsigned FieldAlignmentSize);
139 
140   StructInfo() = default;
141 
StructInfo__anon0f5ce1770111::StructInfo142   StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue)
143       : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
144 };
145 
146 // FIXME: This should probably use a class hierarchy, raw pointers between the
147 // objects, and dynamic type resolution instead of a union. On the other hand,
148 // ownership then becomes much more complicated; the obvious thing would be to
149 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
150 
151 struct StructInitializer;
152 struct IntFieldInfo {
153   SmallVector<const MCExpr *, 1> Values;
154 
155   IntFieldInfo() = default;
IntFieldInfo__anon0f5ce1770111::IntFieldInfo156   IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
IntFieldInfo__anon0f5ce1770111::IntFieldInfo157   IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = V; }
158 };
159 struct RealFieldInfo {
160   SmallVector<APInt, 1> AsIntValues;
161 
162   RealFieldInfo() = default;
RealFieldInfo__anon0f5ce1770111::RealFieldInfo163   RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
RealFieldInfo__anon0f5ce1770111::RealFieldInfo164   RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = V; }
165 };
166 struct StructFieldInfo {
167   std::vector<StructInitializer> Initializers;
168   StructInfo Structure;
169 
170   StructFieldInfo() = default;
StructFieldInfo__anon0f5ce1770111::StructFieldInfo171   StructFieldInfo(const std::vector<StructInitializer> &V, StructInfo S) {
172     Initializers = V;
173     Structure = S;
174   }
StructFieldInfo__anon0f5ce1770111::StructFieldInfo175   StructFieldInfo(std::vector<StructInitializer> &&V, StructInfo S) {
176     Initializers = V;
177     Structure = S;
178   }
179 };
180 
181 class FieldInitializer {
182 public:
183   FieldType FT;
184   union {
185     IntFieldInfo IntInfo;
186     RealFieldInfo RealInfo;
187     StructFieldInfo StructInfo;
188   };
189 
~FieldInitializer()190   ~FieldInitializer() {
191     switch (FT) {
192     case FT_INTEGRAL:
193       IntInfo.~IntFieldInfo();
194       break;
195     case FT_REAL:
196       RealInfo.~RealFieldInfo();
197       break;
198     case FT_STRUCT:
199       StructInfo.~StructFieldInfo();
200       break;
201     }
202   }
203 
FieldInitializer(FieldType FT)204   FieldInitializer(FieldType FT) : FT(FT) {
205     switch (FT) {
206     case FT_INTEGRAL:
207       new (&IntInfo) IntFieldInfo();
208       break;
209     case FT_REAL:
210       new (&RealInfo) RealFieldInfo();
211       break;
212     case FT_STRUCT:
213       new (&StructInfo) StructFieldInfo();
214       break;
215     }
216   }
217 
FieldInitializer(SmallVector<const MCExpr *,1> && Values)218   FieldInitializer(SmallVector<const MCExpr *, 1> &&Values) : FT(FT_INTEGRAL) {
219     new (&IntInfo) IntFieldInfo(Values);
220   }
221 
FieldInitializer(SmallVector<APInt,1> && AsIntValues)222   FieldInitializer(SmallVector<APInt, 1> &&AsIntValues) : FT(FT_REAL) {
223     new (&RealInfo) RealFieldInfo(AsIntValues);
224   }
225 
FieldInitializer(std::vector<StructInitializer> && Initializers,struct StructInfo Structure)226   FieldInitializer(std::vector<StructInitializer> &&Initializers,
227                    struct StructInfo Structure)
228       : FT(FT_STRUCT) {
229     new (&StructInfo) StructFieldInfo(Initializers, Structure);
230   }
231 
FieldInitializer(const FieldInitializer & Initializer)232   FieldInitializer(const FieldInitializer &Initializer) : FT(Initializer.FT) {
233     switch (FT) {
234     case FT_INTEGRAL:
235       new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
236       break;
237     case FT_REAL:
238       new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
239       break;
240     case FT_STRUCT:
241       new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
242       break;
243     }
244   }
245 
FieldInitializer(FieldInitializer && Initializer)246   FieldInitializer(FieldInitializer &&Initializer) : FT(Initializer.FT) {
247     switch (FT) {
248     case FT_INTEGRAL:
249       new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
250       break;
251     case FT_REAL:
252       new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
253       break;
254     case FT_STRUCT:
255       new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
256       break;
257     }
258   }
259 
operator =(const FieldInitializer & Initializer)260   FieldInitializer &operator=(const FieldInitializer &Initializer) {
261     if (FT != Initializer.FT) {
262       switch (FT) {
263       case FT_INTEGRAL:
264         IntInfo.~IntFieldInfo();
265         break;
266       case FT_REAL:
267         RealInfo.~RealFieldInfo();
268         break;
269       case FT_STRUCT:
270         StructInfo.~StructFieldInfo();
271         break;
272       }
273     }
274     FT = Initializer.FT;
275     switch (FT) {
276     case FT_INTEGRAL:
277       IntInfo = Initializer.IntInfo;
278       break;
279     case FT_REAL:
280       RealInfo = Initializer.RealInfo;
281       break;
282     case FT_STRUCT:
283       StructInfo = Initializer.StructInfo;
284       break;
285     }
286     return *this;
287   }
288 
operator =(FieldInitializer && Initializer)289   FieldInitializer &operator=(FieldInitializer &&Initializer) {
290     if (FT != Initializer.FT) {
291       switch (FT) {
292       case FT_INTEGRAL:
293         IntInfo.~IntFieldInfo();
294         break;
295       case FT_REAL:
296         RealInfo.~RealFieldInfo();
297         break;
298       case FT_STRUCT:
299         StructInfo.~StructFieldInfo();
300         break;
301       }
302     }
303     FT = Initializer.FT;
304     switch (FT) {
305     case FT_INTEGRAL:
306       IntInfo = Initializer.IntInfo;
307       break;
308     case FT_REAL:
309       RealInfo = Initializer.RealInfo;
310       break;
311     case FT_STRUCT:
312       StructInfo = Initializer.StructInfo;
313       break;
314     }
315     return *this;
316   }
317 };
318 
319 struct StructInitializer {
320   std::vector<FieldInitializer> FieldInitializers;
321 };
322 
323 struct FieldInfo {
324   // Offset of the field within the containing STRUCT.
325   size_t Offset = 0;
326 
327   // Total size of the field (= LengthOf * Type).
328   unsigned SizeOf = 0;
329 
330   // Number of elements in the field (1 if scalar, >1 if an array).
331   unsigned LengthOf = 0;
332 
333   // Size of a single entry in this field, in bytes ("type" in MASM standards).
334   unsigned Type = 0;
335 
336   FieldInitializer Contents;
337 
FieldInfo__anon0f5ce1770111::FieldInfo338   FieldInfo(FieldType FT) : Contents(FT) {}
339 };
340 
addField(StringRef FieldName,FieldType FT,unsigned FieldAlignmentSize)341 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
342                                 unsigned FieldAlignmentSize) {
343   if (!FieldName.empty())
344     FieldsByName[FieldName.lower()] = Fields.size();
345   Fields.emplace_back(FT);
346   FieldInfo &Field = Fields.back();
347   if (IsUnion) {
348     Field.Offset = 0;
349   } else {
350     Size = llvm::alignTo(Size, std::min(Alignment, FieldAlignmentSize));
351     Field.Offset = Size;
352   }
353   AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
354   return Field;
355 }
356 
357 /// The concrete assembly parser instance.
358 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
359 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
360 class MasmParser : public MCAsmParser {
361 private:
362   AsmLexer Lexer;
363   MCContext &Ctx;
364   MCStreamer &Out;
365   const MCAsmInfo &MAI;
366   SourceMgr &SrcMgr;
367   SourceMgr::DiagHandlerTy SavedDiagHandler;
368   void *SavedDiagContext;
369   std::unique_ptr<MCAsmParserExtension> PlatformParser;
370 
371   /// This is the current buffer index we're lexing from as managed by the
372   /// SourceMgr object.
373   unsigned CurBuffer;
374   std::vector<bool> EndStatementAtEOFStack;
375 
376   AsmCond TheCondState;
377   std::vector<AsmCond> TheCondStack;
378 
379   /// maps directive names to handler methods in parser
380   /// extensions. Extensions register themselves in this map by calling
381   /// addDirectiveHandler.
382   StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
383 
384   /// maps assembly-time variable names to variables.
385   struct Variable {
386     StringRef Name;
387     bool Redefinable = true;
388     bool IsText = false;
389     int64_t NumericValue = 0;
390     std::string TextValue;
391   };
392   StringMap<Variable> Variables;
393 
394   /// Stack of active struct definitions.
395   SmallVector<StructInfo, 1> StructInProgress;
396 
397   /// Maps struct tags to struct definitions.
398   StringMap<StructInfo> Structs;
399 
400   /// Maps data location names to types.
401   StringMap<AsmTypeInfo> KnownType;
402 
403   /// Stack of active macro instantiations.
404   std::vector<MacroInstantiation*> ActiveMacros;
405 
406   /// List of bodies of anonymous macros.
407   std::deque<MCAsmMacro> MacroLikeBodies;
408 
409   /// Keeps track of how many .macro's have been instantiated.
410   unsigned NumOfMacroInstantiations;
411 
412   /// The values from the last parsed cpp hash file line comment if any.
413   struct CppHashInfoTy {
414     StringRef Filename;
415     int64_t LineNumber;
416     SMLoc Loc;
417     unsigned Buf;
CppHashInfoTy__anon0f5ce1770111::MasmParser::CppHashInfoTy418     CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {}
419   };
420   CppHashInfoTy CppHashInfo;
421 
422   /// The filename from the first cpp hash file line comment, if any.
423   StringRef FirstCppHashFilename;
424 
425   /// List of forward directional labels for diagnosis at the end.
426   SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
427 
428   /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
429   /// Defaults to 1U, meaning Intel.
430   unsigned AssemblerDialect = 1U;
431 
432   /// is Darwin compatibility enabled?
433   bool IsDarwin = false;
434 
435   /// Are we parsing ms-style inline assembly?
436   bool ParsingMSInlineAsm = false;
437 
438   /// Did we already inform the user about inconsistent MD5 usage?
439   bool ReportedInconsistentMD5 = false;
440 
441   // Current <...> expression depth.
442   unsigned AngleBracketDepth = 0U;
443 
444   // Number of locals defined.
445   uint16_t LocalCounter = 0;
446 
447 public:
448   MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
449              const MCAsmInfo &MAI, unsigned CB);
450   MasmParser(const MasmParser &) = delete;
451   MasmParser &operator=(const MasmParser &) = delete;
452   ~MasmParser() override;
453 
454   bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
455 
addDirectiveHandler(StringRef Directive,ExtensionDirectiveHandler Handler)456   void addDirectiveHandler(StringRef Directive,
457                            ExtensionDirectiveHandler Handler) override {
458     ExtensionDirectiveMap[Directive] = Handler;
459     if (DirectiveKindMap.find(Directive) == DirectiveKindMap.end()) {
460       DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
461     }
462   }
463 
addAliasForDirective(StringRef Directive,StringRef Alias)464   void addAliasForDirective(StringRef Directive, StringRef Alias) override {
465     DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
466   }
467 
468   /// @name MCAsmParser Interface
469   /// {
470 
getSourceManager()471   SourceMgr &getSourceManager() override { return SrcMgr; }
getLexer()472   MCAsmLexer &getLexer() override { return Lexer; }
getContext()473   MCContext &getContext() override { return Ctx; }
getStreamer()474   MCStreamer &getStreamer() override { return Out; }
475 
getCVContext()476   CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
477 
getAssemblerDialect()478   unsigned getAssemblerDialect() override {
479     if (AssemblerDialect == ~0U)
480       return MAI.getAssemblerDialect();
481     else
482       return AssemblerDialect;
483   }
setAssemblerDialect(unsigned i)484   void setAssemblerDialect(unsigned i) override {
485     AssemblerDialect = i;
486   }
487 
488   void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override;
489   bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override;
490   bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override;
491 
492   const AsmToken &Lex() override;
493 
setParsingMSInlineAsm(bool V)494   void setParsingMSInlineAsm(bool V) override {
495     ParsingMSInlineAsm = V;
496     // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
497     // hex integer literals.
498     Lexer.setLexMasmIntegers(V);
499   }
isParsingMSInlineAsm()500   bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
501 
isParsingMasm() const502   bool isParsingMasm() const override { return true; }
503 
504   bool defineMacro(StringRef Name, StringRef Value) override;
505 
506   bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
507   bool lookUpField(StringRef Base, StringRef Member,
508                    AsmFieldInfo &Info) const override;
509 
510   bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
511 
512   bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
513                         unsigned &NumOutputs, unsigned &NumInputs,
514                         SmallVectorImpl<std::pair<void *,bool>> &OpDecls,
515                         SmallVectorImpl<std::string> &Constraints,
516                         SmallVectorImpl<std::string> &Clobbers,
517                         const MCInstrInfo *MII, const MCInstPrinter *IP,
518                         MCAsmParserSemaCallback &SI) override;
519 
520   bool parseExpression(const MCExpr *&Res);
521   bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
522   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
523                         AsmTypeInfo *TypeInfo) override;
524   bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
525   bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
526                              SMLoc &EndLoc) override;
527   bool parseAbsoluteExpression(int64_t &Res) override;
528 
529   /// Parse a floating point expression using the float \p Semantics
530   /// and set \p Res to the value.
531   bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
532 
533   /// Parse an identifier or string (as a quoted identifier)
534   /// and set \p Res to the identifier contents.
535   bool parseIdentifier(StringRef &Res) override;
536   void eatToEndOfStatement() override;
537 
538   bool checkForValidSection() override;
539 
540   /// }
541 
542 private:
543   bool parseStatement(ParseStatementInfo &Info,
544                       MCAsmParserSemaCallback *SI);
545   bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
546   bool parseCppHashLineFilenameComment(SMLoc L);
547 
548   bool expandMacro(raw_svector_ostream &OS, StringRef Body,
549                    ArrayRef<MCAsmMacroParameter> Parameters,
550                    ArrayRef<MCAsmMacroArgument> A,
551                    const std::vector<std::string> &Locals, SMLoc L);
552 
553   /// Are we inside a macro instantiation?
isInsideMacroInstantiation()554   bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
555 
556   /// Handle entry to macro instantiation.
557   ///
558   /// \param M The macro.
559   /// \param NameLoc Instantiation location.
560   bool handleMacroEntry(
561       const MCAsmMacro *M, SMLoc NameLoc,
562       AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
563 
564   /// Handle invocation of macro function.
565   ///
566   /// \param M The macro.
567   /// \param NameLoc Invocation location.
568   bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
569 
570   /// Handle exit from macro instantiation.
571   void handleMacroExit();
572 
573   /// Extract AsmTokens for a macro argument.
574   bool
575   parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
576                      AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
577 
578   /// Parse all macro arguments for a given macro.
579   bool
580   parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
581                       AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
582 
583   void printMacroInstantiations();
584 
585   bool expandStatement(SMLoc Loc);
586 
printMessage(SMLoc Loc,SourceMgr::DiagKind Kind,const Twine & Msg,SMRange Range=None) const587   void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
588                     SMRange Range = None) const {
589     ArrayRef<SMRange> Ranges(Range);
590     SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
591   }
592   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
593 
594   bool lookUpField(const StructInfo &Structure, StringRef Member,
595                    AsmFieldInfo &Info) const;
596 
597   /// Should we emit DWARF describing this assembler source?  (Returns false if
598   /// the source has .file directives, which means we don't want to generate
599   /// info describing the assembler source itself.)
600   bool enabledGenDwarfForAssembly();
601 
602   /// Enter the specified file. This returns true on failure.
603   bool enterIncludeFile(const std::string &Filename);
604 
605   /// Reset the current lexer position to that given by \p Loc. The
606   /// current token is not set; clients should ensure Lex() is called
607   /// subsequently.
608   ///
609   /// \param InBuffer If not 0, should be the known buffer id that contains the
610   /// location.
611   void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
612                  bool EndStatementAtEOF = true);
613 
614   /// Parse up to a token of kind \p EndTok and return the contents from the
615   /// current token up to (but not including) this token; the current token on
616   /// exit will be either this kind or EOF. Reads through instantiated macro
617   /// functions and text macros.
618   SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
619   std::string parseStringTo(AsmToken::TokenKind EndTok);
620 
621   /// Parse up to the end of statement and return the contents from the current
622   /// token until the end of the statement; the current token on exit will be
623   /// either the EndOfStatement or EOF.
624   StringRef parseStringToEndOfStatement() override;
625 
626   bool parseTextItem(std::string &Data);
627 
628   unsigned getBinOpPrecedence(AsmToken::TokenKind K,
629                               MCBinaryExpr::Opcode &Kind);
630 
631   bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
632   bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
633   bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
634 
635   bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
636 
637   bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
638   bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
639 
640   // Generic (target and platform independent) directive parsing.
641   enum DirectiveKind {
642     DK_NO_DIRECTIVE, // Placeholder
643     DK_HANDLER_DIRECTIVE,
644     DK_ASSIGN,
645     DK_EQU,
646     DK_TEXTEQU,
647     DK_ASCII,
648     DK_ASCIZ,
649     DK_STRING,
650     DK_BYTE,
651     DK_SBYTE,
652     DK_WORD,
653     DK_SWORD,
654     DK_DWORD,
655     DK_SDWORD,
656     DK_FWORD,
657     DK_QWORD,
658     DK_SQWORD,
659     DK_DB,
660     DK_DD,
661     DK_DF,
662     DK_DQ,
663     DK_DW,
664     DK_REAL4,
665     DK_REAL8,
666     DK_REAL10,
667     DK_ALIGN,
668     DK_ORG,
669     DK_ENDR,
670     DK_EXTERN,
671     DK_PUBLIC,
672     DK_COMM,
673     DK_COMMENT,
674     DK_INCLUDE,
675     DK_REPEAT,
676     DK_WHILE,
677     DK_FOR,
678     DK_FORC,
679     DK_IF,
680     DK_IFE,
681     DK_IFB,
682     DK_IFNB,
683     DK_IFDEF,
684     DK_IFNDEF,
685     DK_IFDIF,
686     DK_IFDIFI,
687     DK_IFIDN,
688     DK_IFIDNI,
689     DK_ELSEIF,
690     DK_ELSEIFE,
691     DK_ELSEIFB,
692     DK_ELSEIFNB,
693     DK_ELSEIFDEF,
694     DK_ELSEIFNDEF,
695     DK_ELSEIFDIF,
696     DK_ELSEIFDIFI,
697     DK_ELSEIFIDN,
698     DK_ELSEIFIDNI,
699     DK_ELSE,
700     DK_ENDIF,
701     DK_FILE,
702     DK_LINE,
703     DK_LOC,
704     DK_STABS,
705     DK_CV_FILE,
706     DK_CV_FUNC_ID,
707     DK_CV_INLINE_SITE_ID,
708     DK_CV_LOC,
709     DK_CV_LINETABLE,
710     DK_CV_INLINE_LINETABLE,
711     DK_CV_DEF_RANGE,
712     DK_CV_STRINGTABLE,
713     DK_CV_STRING,
714     DK_CV_FILECHECKSUMS,
715     DK_CV_FILECHECKSUM_OFFSET,
716     DK_CV_FPO_DATA,
717     DK_CFI_SECTIONS,
718     DK_CFI_STARTPROC,
719     DK_CFI_ENDPROC,
720     DK_CFI_DEF_CFA,
721     DK_CFI_DEF_CFA_OFFSET,
722     DK_CFI_ADJUST_CFA_OFFSET,
723     DK_CFI_DEF_CFA_REGISTER,
724     DK_CFI_OFFSET,
725     DK_CFI_REL_OFFSET,
726     DK_CFI_PERSONALITY,
727     DK_CFI_LSDA,
728     DK_CFI_REMEMBER_STATE,
729     DK_CFI_RESTORE_STATE,
730     DK_CFI_SAME_VALUE,
731     DK_CFI_RESTORE,
732     DK_CFI_ESCAPE,
733     DK_CFI_RETURN_COLUMN,
734     DK_CFI_SIGNAL_FRAME,
735     DK_CFI_UNDEFINED,
736     DK_CFI_REGISTER,
737     DK_CFI_WINDOW_SAVE,
738     DK_CFI_B_KEY_FRAME,
739     DK_MACRO,
740     DK_EXITM,
741     DK_ENDM,
742     DK_PURGE,
743     DK_ERR,
744     DK_ERRB,
745     DK_ERRNB,
746     DK_ERRDEF,
747     DK_ERRNDEF,
748     DK_ERRDIF,
749     DK_ERRDIFI,
750     DK_ERRIDN,
751     DK_ERRIDNI,
752     DK_ERRE,
753     DK_ERRNZ,
754     DK_ECHO,
755     DK_STRUCT,
756     DK_UNION,
757     DK_ENDS,
758     DK_END,
759     DK_PUSHFRAME,
760     DK_PUSHREG,
761     DK_SAVEREG,
762     DK_SAVEXMM128,
763     DK_SETFRAME,
764     DK_RADIX,
765   };
766 
767   /// Maps directive name --> DirectiveKind enum, for directives parsed by this
768   /// class.
769   StringMap<DirectiveKind> DirectiveKindMap;
770 
771   bool isMacroLikeDirective();
772 
773   // Codeview def_range type parsing.
774   enum CVDefRangeType {
775     CVDR_DEFRANGE = 0, // Placeholder
776     CVDR_DEFRANGE_REGISTER,
777     CVDR_DEFRANGE_FRAMEPOINTER_REL,
778     CVDR_DEFRANGE_SUBFIELD_REGISTER,
779     CVDR_DEFRANGE_REGISTER_REL
780   };
781 
782   /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
783   /// def_range types parsed by this class.
784   StringMap<CVDefRangeType> CVDefRangeTypeMap;
785 
786   // ".ascii", ".asciz", ".string"
787   bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
788 
789   // "byte", "word", ...
790   bool emitIntValue(const MCExpr *Value, unsigned Size);
791   bool parseScalarInitializer(unsigned Size,
792                               SmallVectorImpl<const MCExpr *> &Values,
793                               unsigned StringPadLength = 0);
794   bool parseScalarInstList(
795       unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
796       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
797   bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
798   bool addIntegralField(StringRef Name, unsigned Size);
799   bool parseDirectiveValue(StringRef IDVal, unsigned Size);
800   bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
801                                 StringRef Name, SMLoc NameLoc);
802 
803   // "real4", "real8", "real10"
804   bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
805   bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
806   bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
807                                size_t Size);
808   bool parseRealInstList(
809       const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
810       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
811   bool parseDirectiveNamedRealValue(StringRef TypeName,
812                                     const fltSemantics &Semantics,
813                                     unsigned Size, StringRef Name,
814                                     SMLoc NameLoc);
815 
816   bool parseOptionalAngleBracketOpen();
817   bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
818 
819   bool parseFieldInitializer(const FieldInfo &Field,
820                              FieldInitializer &Initializer);
821   bool parseFieldInitializer(const FieldInfo &Field,
822                              const IntFieldInfo &Contents,
823                              FieldInitializer &Initializer);
824   bool parseFieldInitializer(const FieldInfo &Field,
825                              const RealFieldInfo &Contents,
826                              FieldInitializer &Initializer);
827   bool parseFieldInitializer(const FieldInfo &Field,
828                              const StructFieldInfo &Contents,
829                              FieldInitializer &Initializer);
830 
831   bool parseStructInitializer(const StructInfo &Structure,
832                               StructInitializer &Initializer);
833   bool parseStructInstList(
834       const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
835       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
836 
837   bool emitFieldValue(const FieldInfo &Field);
838   bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
839   bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
840   bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
841 
842   bool emitFieldInitializer(const FieldInfo &Field,
843                             const FieldInitializer &Initializer);
844   bool emitFieldInitializer(const FieldInfo &Field,
845                             const IntFieldInfo &Contents,
846                             const IntFieldInfo &Initializer);
847   bool emitFieldInitializer(const FieldInfo &Field,
848                             const RealFieldInfo &Contents,
849                             const RealFieldInfo &Initializer);
850   bool emitFieldInitializer(const FieldInfo &Field,
851                             const StructFieldInfo &Contents,
852                             const StructFieldInfo &Initializer);
853 
854   bool emitStructInitializer(const StructInfo &Structure,
855                              const StructInitializer &Initializer);
856 
857   // User-defined types (structs, unions):
858   bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
859   bool addStructField(StringRef Name, const StructInfo &Structure);
860   bool parseDirectiveStructValue(const StructInfo &Structure,
861                                  StringRef Directive, SMLoc DirLoc);
862   bool parseDirectiveNamedStructValue(const StructInfo &Structure,
863                                       StringRef Directive, SMLoc DirLoc,
864                                       StringRef Name);
865 
866   // "=", "equ", "textequ"
867   bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
868                             DirectiveKind DirKind);
869 
870   bool parseDirectiveOrg(); // ".org"
871   bool parseDirectiveAlign();  // "align"
872 
873   // ".file", ".line", ".loc", ".stabs"
874   bool parseDirectiveFile(SMLoc DirectiveLoc);
875   bool parseDirectiveLine();
876   bool parseDirectiveLoc();
877   bool parseDirectiveStabs();
878 
879   // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
880   // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
881   bool parseDirectiveCVFile();
882   bool parseDirectiveCVFuncId();
883   bool parseDirectiveCVInlineSiteId();
884   bool parseDirectiveCVLoc();
885   bool parseDirectiveCVLinetable();
886   bool parseDirectiveCVInlineLinetable();
887   bool parseDirectiveCVDefRange();
888   bool parseDirectiveCVString();
889   bool parseDirectiveCVStringTable();
890   bool parseDirectiveCVFileChecksums();
891   bool parseDirectiveCVFileChecksumOffset();
892   bool parseDirectiveCVFPOData();
893 
894   // .cfi directives
895   bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
896   bool parseDirectiveCFIWindowSave();
897   bool parseDirectiveCFISections();
898   bool parseDirectiveCFIStartProc();
899   bool parseDirectiveCFIEndProc();
900   bool parseDirectiveCFIDefCfaOffset();
901   bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
902   bool parseDirectiveCFIAdjustCfaOffset();
903   bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
904   bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
905   bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
906   bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
907   bool parseDirectiveCFIRememberState();
908   bool parseDirectiveCFIRestoreState();
909   bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
910   bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
911   bool parseDirectiveCFIEscape();
912   bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
913   bool parseDirectiveCFISignalFrame();
914   bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
915 
916   // macro directives
917   bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
918   bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
919                                std::string &Value);
920   bool parseDirectiveEndMacro(StringRef Directive);
921   bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
922 
923   bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
924                             StringRef Name, SMLoc NameLoc);
925   bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
926   bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
927   bool parseDirectiveNestedEnds();
928 
929   /// Parse a directive like ".globl" which accepts a single symbol (which
930   /// should be a label or an external).
931   bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
932 
933   bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
934 
935   bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
936 
937   bool parseDirectiveInclude(); // "include"
938 
939   // "if" or "ife"
940   bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
941   // "ifb" or "ifnb", depending on ExpectBlank.
942   bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
943   // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
944   // CaseInsensitive.
945   bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
946                            bool CaseInsensitive);
947   // "ifdef" or "ifndef", depending on expect_defined
948   bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
949   // "elseif" or "elseife"
950   bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
951   // "elseifb" or "elseifnb", depending on ExpectBlank.
952   bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
953   // ".elseifdef" or ".elseifndef", depending on expect_defined
954   bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
955   // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
956   // ExpectEqual and CaseInsensitive.
957   bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
958                                bool CaseInsensitive);
959   bool parseDirectiveElse(SMLoc DirectiveLoc);   // "else"
960   bool parseDirectiveEndIf(SMLoc DirectiveLoc);  // "endif"
961   bool parseEscapedString(std::string &Data) override;
962   bool parseAngleBracketString(std::string &Data) override;
963 
964   // Macro-like directives
965   MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
966   void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
967                                 raw_svector_ostream &OS);
968   void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
969                                 SMLoc ExitLoc, raw_svector_ostream &OS);
970   bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
971   bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
972   bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
973   bool parseDirectiveWhile(SMLoc DirectiveLoc);
974 
975   // "_emit" or "__emit"
976   bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
977                             size_t Len);
978 
979   // "align"
980   bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
981 
982   // "end"
983   bool parseDirectiveEnd(SMLoc DirectiveLoc);
984 
985   // ".err"
986   bool parseDirectiveError(SMLoc DirectiveLoc);
987   // ".errb" or ".errnb", depending on ExpectBlank.
988   bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
989   // ".errdef" or ".errndef", depending on ExpectBlank.
990   bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
991   // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
992   // and CaseInsensitive.
993   bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
994                                 bool CaseInsensitive);
995   // ".erre" or ".errnz", depending on ExpectZero.
996   bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
997 
998   // ".radix"
999   bool parseDirectiveRadix(SMLoc DirectiveLoc);
1000 
1001   // "echo"
1002   bool parseDirectiveEcho();
1003 
1004   void initializeDirectiveKindMap();
1005   void initializeCVDefRangeTypeMap();
1006 };
1007 
1008 } // end anonymous namespace
1009 
1010 namespace llvm {
1011 
1012 extern MCAsmParserExtension *createCOFFMasmParser();
1013 
1014 } // end namespace llvm
1015 
1016 enum { DEFAULT_ADDRSPACE = 0 };
1017 
MasmParser(SourceMgr & SM,MCContext & Ctx,MCStreamer & Out,const MCAsmInfo & MAI,unsigned CB=0)1018 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1019                        const MCAsmInfo &MAI, unsigned CB = 0)
1020     : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1021       CurBuffer(CB ? CB : SM.getMainFileID()) {
1022   HadError = false;
1023   // Save the old handler.
1024   SavedDiagHandler = SrcMgr.getDiagHandler();
1025   SavedDiagContext = SrcMgr.getDiagContext();
1026   // Set our own handler which calls the saved handler.
1027   SrcMgr.setDiagHandler(DiagHandler, this);
1028   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1029   EndStatementAtEOFStack.push_back(true);
1030 
1031   // Initialize the platform / file format parser.
1032   switch (Ctx.getObjectFileType()) {
1033   case MCContext::IsCOFF:
1034     PlatformParser.reset(createCOFFMasmParser());
1035     break;
1036   default:
1037     report_fatal_error("llvm-ml currently supports only COFF output.");
1038     break;
1039   }
1040 
1041   initializeDirectiveKindMap();
1042   PlatformParser->Initialize(*this);
1043   initializeCVDefRangeTypeMap();
1044 
1045   NumOfMacroInstantiations = 0;
1046 }
1047 
~MasmParser()1048 MasmParser::~MasmParser() {
1049   assert((HadError || ActiveMacros.empty()) &&
1050          "Unexpected active macro instantiation!");
1051 
1052   // Restore the saved diagnostics handler and context for use during
1053   // finalization.
1054   SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1055 }
1056 
printMacroInstantiations()1057 void MasmParser::printMacroInstantiations() {
1058   // Print the active macro instantiation stack.
1059   for (std::vector<MacroInstantiation *>::const_reverse_iterator
1060            it = ActiveMacros.rbegin(),
1061            ie = ActiveMacros.rend();
1062        it != ie; ++it)
1063     printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1064                  "while in macro instantiation");
1065 }
1066 
Note(SMLoc L,const Twine & Msg,SMRange Range)1067 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1068   printPendingErrors();
1069   printMessage(L, SourceMgr::DK_Note, Msg, Range);
1070   printMacroInstantiations();
1071 }
1072 
Warning(SMLoc L,const Twine & Msg,SMRange Range)1073 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1074   if (getTargetParser().getTargetOptions().MCNoWarn)
1075     return false;
1076   if (getTargetParser().getTargetOptions().MCFatalWarnings)
1077     return Error(L, Msg, Range);
1078   printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1079   printMacroInstantiations();
1080   return false;
1081 }
1082 
printError(SMLoc L,const Twine & Msg,SMRange Range)1083 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1084   HadError = true;
1085   printMessage(L, SourceMgr::DK_Error, Msg, Range);
1086   printMacroInstantiations();
1087   return true;
1088 }
1089 
enterIncludeFile(const std::string & Filename)1090 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1091   std::string IncludedFile;
1092   unsigned NewBuf =
1093       SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1094   if (!NewBuf)
1095     return true;
1096 
1097   CurBuffer = NewBuf;
1098   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1099   EndStatementAtEOFStack.push_back(true);
1100   return false;
1101 }
1102 
jumpToLoc(SMLoc Loc,unsigned InBuffer,bool EndStatementAtEOF)1103 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1104                            bool EndStatementAtEOF) {
1105   CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1106   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1107                   Loc.getPointer(), EndStatementAtEOF);
1108 }
1109 
Lex()1110 const AsmToken &MasmParser::Lex() {
1111   if (Lexer.getTok().is(AsmToken::Error))
1112     Error(Lexer.getErrLoc(), Lexer.getErr());
1113 
1114   // if it's a end of statement with a comment in it
1115   if (getTok().is(AsmToken::EndOfStatement)) {
1116     // if this is a line comment output it.
1117     if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1118         getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1119       Out.addExplicitComment(Twine(getTok().getString()));
1120   }
1121 
1122   const AsmToken *tok = &Lexer.Lex();
1123 
1124   while (tok->is(AsmToken::Identifier)) {
1125     auto it = Variables.find(tok->getIdentifier().lower());
1126     const llvm::MCAsmMacro *M =
1127         getContext().lookupMacro(tok->getIdentifier().lower());
1128     if (it != Variables.end() && it->second.IsText) {
1129       // This is a textmacro; expand it in place.
1130       std::unique_ptr<MemoryBuffer> Instantiation =
1131           MemoryBuffer::getMemBufferCopy(it->second.TextValue,
1132                                          "<instantiation>");
1133 
1134       // Jump to the macro instantiation and prime the lexer.
1135       CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation),
1136                                             getTok().getEndLoc());
1137       Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1138                       /*EndStatementAtEOF=*/false);
1139       EndStatementAtEOFStack.push_back(false);
1140       tok = &Lexer.Lex();
1141     } else if (M && M->IsFunction && Lexer.peekTok().is(AsmToken::LParen)) {
1142       // This is a macro function invocation; expand it in place.
1143       const AsmToken MacroTok = *tok;
1144       tok = &Lexer.Lex();
1145       if (handleMacroInvocation(M, MacroTok.getLoc())) {
1146         Lexer.UnLex(AsmToken(AsmToken::Error, MacroTok.getIdentifier()));
1147         tok = &Lexer.Lex();
1148       }
1149       continue;
1150     } else {
1151       break;
1152     }
1153   }
1154 
1155   // Parse comments here to be deferred until end of next statement.
1156   while (tok->is(AsmToken::Comment)) {
1157     if (MAI.preserveAsmComments())
1158       Out.addExplicitComment(Twine(tok->getString()));
1159     tok = &Lexer.Lex();
1160   }
1161 
1162   // Recognize and bypass line continuations.
1163   while (tok->is(AsmToken::BackSlash) &&
1164          Lexer.peekTok().is(AsmToken::EndOfStatement)) {
1165     // Eat both the backslash and the end of statement.
1166     Lexer.Lex();
1167     tok = &Lexer.Lex();
1168   }
1169 
1170   if (tok->is(AsmToken::Eof)) {
1171     // If this is the end of an included file, pop the parent file off the
1172     // include stack.
1173     SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1174     if (ParentIncludeLoc != SMLoc()) {
1175       EndStatementAtEOFStack.pop_back();
1176       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1177       return Lex();
1178     }
1179     EndStatementAtEOFStack.pop_back();
1180     assert(EndStatementAtEOFStack.empty());
1181   }
1182 
1183   return *tok;
1184 }
1185 
enabledGenDwarfForAssembly()1186 bool MasmParser::enabledGenDwarfForAssembly() {
1187   // Check whether the user specified -g.
1188   if (!getContext().getGenDwarfForAssembly())
1189     return false;
1190   // If we haven't encountered any .file directives (which would imply that
1191   // the assembler source was produced with debug info already) then emit one
1192   // describing the assembler source file itself.
1193   if (getContext().getGenDwarfFileNumber() == 0) {
1194     // Use the first #line directive for this, if any. It's preprocessed, so
1195     // there is no checksum, and of course no source directive.
1196     if (!FirstCppHashFilename.empty())
1197       getContext().setMCLineTableRootFile(/*CUID=*/0,
1198                                           getContext().getCompilationDir(),
1199                                           FirstCppHashFilename,
1200                                           /*Cksum=*/None, /*Source=*/None);
1201     const MCDwarfFile &RootFile =
1202         getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1203     getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1204         /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1205         RootFile.Checksum, RootFile.Source));
1206   }
1207   return true;
1208 }
1209 
Run(bool NoInitialTextSection,bool NoFinalize)1210 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1211   // Create the initial section, if requested.
1212   if (!NoInitialTextSection)
1213     Out.InitSections(false);
1214 
1215   // Prime the lexer.
1216   Lex();
1217 
1218   HadError = false;
1219   AsmCond StartingCondState = TheCondState;
1220   SmallVector<AsmRewrite, 4> AsmStrRewrites;
1221 
1222   // If we are generating dwarf for assembly source files save the initial text
1223   // section.  (Don't use enabledGenDwarfForAssembly() here, as we aren't
1224   // emitting any actual debug info yet and haven't had a chance to parse any
1225   // embedded .file directives.)
1226   if (getContext().getGenDwarfForAssembly()) {
1227     MCSection *Sec = getStreamer().getCurrentSectionOnly();
1228     if (!Sec->getBeginSymbol()) {
1229       MCSymbol *SectionStartSym = getContext().createTempSymbol();
1230       getStreamer().emitLabel(SectionStartSym);
1231       Sec->setBeginSymbol(SectionStartSym);
1232     }
1233     bool InsertResult = getContext().addGenDwarfSection(Sec);
1234     assert(InsertResult && ".text section should not have debug info yet");
1235     (void)InsertResult;
1236   }
1237 
1238   getTargetParser().onBeginOfFile();
1239 
1240   // While we have input, parse each statement.
1241   while (Lexer.isNot(AsmToken::Eof) ||
1242          SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1243     // Skip through the EOF at the end of an inclusion.
1244     if (Lexer.is(AsmToken::Eof))
1245       Lex();
1246 
1247     ParseStatementInfo Info(&AsmStrRewrites);
1248     bool Parsed = parseStatement(Info, nullptr);
1249 
1250     // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1251     // for printing ErrMsg via Lex() only if no (presumably better) parser error
1252     // exists.
1253     if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1254       Lex();
1255     }
1256 
1257     // parseStatement returned true so may need to emit an error.
1258     printPendingErrors();
1259 
1260     // Skipping to the next line if needed.
1261     if (Parsed && !getLexer().isAtStartOfStatement())
1262       eatToEndOfStatement();
1263   }
1264 
1265   getTargetParser().onEndOfFile();
1266   printPendingErrors();
1267 
1268   // All errors should have been emitted.
1269   assert(!hasPendingError() && "unexpected error from parseStatement");
1270 
1271   getTargetParser().flushPendingInstructions(getStreamer());
1272 
1273   if (TheCondState.TheCond != StartingCondState.TheCond ||
1274       TheCondState.Ignore != StartingCondState.Ignore)
1275     printError(getTok().getLoc(), "unmatched .ifs or .elses");
1276   // Check to see there are no empty DwarfFile slots.
1277   const auto &LineTables = getContext().getMCDwarfLineTables();
1278   if (!LineTables.empty()) {
1279     unsigned Index = 0;
1280     for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1281       if (File.Name.empty() && Index != 0)
1282         printError(getTok().getLoc(), "unassigned file number: " +
1283                                           Twine(Index) +
1284                                           " for .file directives");
1285       ++Index;
1286     }
1287   }
1288 
1289   // Check to see that all assembler local symbols were actually defined.
1290   // Targets that don't do subsections via symbols may not want this, though,
1291   // so conservatively exclude them. Only do this if we're finalizing, though,
1292   // as otherwise we won't necessarilly have seen everything yet.
1293   if (!NoFinalize) {
1294     if (MAI.hasSubsectionsViaSymbols()) {
1295       for (const auto &TableEntry : getContext().getSymbols()) {
1296         MCSymbol *Sym = TableEntry.getValue();
1297         // Variable symbols may not be marked as defined, so check those
1298         // explicitly. If we know it's a variable, we have a definition for
1299         // the purposes of this check.
1300         if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
1301           // FIXME: We would really like to refer back to where the symbol was
1302           // first referenced for a source location. We need to add something
1303           // to track that. Currently, we just point to the end of the file.
1304           printError(getTok().getLoc(), "assembler local symbol '" +
1305                                             Sym->getName() + "' not defined");
1306       }
1307     }
1308 
1309     // Temporary symbols like the ones for directional jumps don't go in the
1310     // symbol table. They also need to be diagnosed in all (final) cases.
1311     for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1312       if (std::get<2>(LocSym)->isUndefined()) {
1313         // Reset the state of any "# line file" directives we've seen to the
1314         // context as it was at the diagnostic site.
1315         CppHashInfo = std::get<1>(LocSym);
1316         printError(std::get<0>(LocSym), "directional label undefined");
1317       }
1318     }
1319   }
1320 
1321   // Finalize the output stream if there are no errors and if the client wants
1322   // us to.
1323   if (!HadError && !NoFinalize)
1324     Out.Finish(Lexer.getLoc());
1325 
1326   return HadError || getContext().hadError();
1327 }
1328 
checkForValidSection()1329 bool MasmParser::checkForValidSection() {
1330   if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1331     Out.InitSections(false);
1332     return Error(getTok().getLoc(),
1333                  "expected section directive before assembly directive");
1334   }
1335   return false;
1336 }
1337 
1338 /// Throw away the rest of the line for testing purposes.
eatToEndOfStatement()1339 void MasmParser::eatToEndOfStatement() {
1340   while (Lexer.isNot(AsmToken::EndOfStatement)) {
1341     if (Lexer.is(AsmToken::Eof)) {
1342       SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1343       if (ParentIncludeLoc == SMLoc()) {
1344         break;
1345       }
1346 
1347       EndStatementAtEOFStack.pop_back();
1348       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1349     }
1350 
1351     Lexer.Lex();
1352   }
1353 
1354   // Eat EOL.
1355   if (Lexer.is(AsmToken::EndOfStatement))
1356     Lexer.Lex();
1357 }
1358 
1359 SmallVector<StringRef, 1>
parseStringRefsTo(AsmToken::TokenKind EndTok)1360 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1361   SmallVector<StringRef, 1> Refs;
1362   const char *Start = getTok().getLoc().getPointer();
1363   while (Lexer.isNot(EndTok)) {
1364     if (Lexer.is(AsmToken::Eof)) {
1365       SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1366       if (ParentIncludeLoc == SMLoc()) {
1367         break;
1368       }
1369       Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1370 
1371       EndStatementAtEOFStack.pop_back();
1372       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1373       Lexer.Lex();
1374       Start = getTok().getLoc().getPointer();
1375     } else {
1376       Lexer.Lex();
1377     }
1378   }
1379   Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1380   return Refs;
1381 }
1382 
parseStringTo(AsmToken::TokenKind EndTok)1383 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1384   SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1385   std::string Str;
1386   for (StringRef S : Refs) {
1387     Str.append(S.str());
1388   }
1389   return Str;
1390 }
1391 
parseStringToEndOfStatement()1392 StringRef MasmParser::parseStringToEndOfStatement() {
1393   const char *Start = getTok().getLoc().getPointer();
1394 
1395   while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1396     Lexer.Lex();
1397 
1398   const char *End = getTok().getLoc().getPointer();
1399   return StringRef(Start, End - Start);
1400 }
1401 
1402 /// Parse a paren expression and return it.
1403 /// NOTE: This assumes the leading '(' has already been consumed.
1404 ///
1405 /// parenexpr ::= expr)
1406 ///
parseParenExpr(const MCExpr * & Res,SMLoc & EndLoc)1407 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1408   if (parseExpression(Res))
1409     return true;
1410   if (Lexer.isNot(AsmToken::RParen))
1411     return TokError("expected ')' in parentheses expression");
1412   EndLoc = Lexer.getTok().getEndLoc();
1413   Lex();
1414   return false;
1415 }
1416 
1417 /// Parse a bracket expression and return it.
1418 /// NOTE: This assumes the leading '[' has already been consumed.
1419 ///
1420 /// bracketexpr ::= expr]
1421 ///
parseBracketExpr(const MCExpr * & Res,SMLoc & EndLoc)1422 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1423   if (parseExpression(Res))
1424     return true;
1425   EndLoc = getTok().getEndLoc();
1426   if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1427     return true;
1428   return false;
1429 }
1430 
1431 /// Parse a primary expression and return it.
1432 ///  primaryexpr ::= (parenexpr
1433 ///  primaryexpr ::= symbol
1434 ///  primaryexpr ::= number
1435 ///  primaryexpr ::= '.'
1436 ///  primaryexpr ::= ~,+,-,'not' primaryexpr
1437 ///  primaryexpr ::= string
1438 ///          (a string is interpreted as a 64-bit number in big-endian base-256)
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc,AsmTypeInfo * TypeInfo)1439 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1440                                   AsmTypeInfo *TypeInfo) {
1441   SMLoc FirstTokenLoc = getLexer().getLoc();
1442   AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1443   switch (FirstTokenKind) {
1444   default:
1445     return TokError("unknown token in expression");
1446   // If we have an error assume that we've already handled it.
1447   case AsmToken::Error:
1448     return true;
1449   case AsmToken::Exclaim:
1450     Lex(); // Eat the operator.
1451     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1452       return true;
1453     Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1454     return false;
1455   case AsmToken::Dollar:
1456   case AsmToken::At:
1457   case AsmToken::Identifier: {
1458     StringRef Identifier;
1459     if (parseIdentifier(Identifier)) {
1460       // We may have failed but $ may be a valid token.
1461       if (getTok().is(AsmToken::Dollar)) {
1462         if (Lexer.getMAI().getDollarIsPC()) {
1463           Lex();
1464           // This is a '$' reference, which references the current PC.  Emit a
1465           // temporary label to the streamer and refer to it.
1466           MCSymbol *Sym = Ctx.createTempSymbol();
1467           Out.emitLabel(Sym);
1468           Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1469                                         getContext());
1470           EndLoc = FirstTokenLoc;
1471           return false;
1472         }
1473         return Error(FirstTokenLoc, "invalid token in expression");
1474       }
1475     }
1476     // Parse named bitwise negation.
1477     if (Identifier.equals_lower("not")) {
1478       if (parsePrimaryExpr(Res, EndLoc, nullptr))
1479         return true;
1480       Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1481       return false;
1482     }
1483     // Parse symbol variant.
1484     std::pair<StringRef, StringRef> Split;
1485     if (!MAI.useParensForSymbolVariant()) {
1486       if (FirstTokenKind == AsmToken::String) {
1487         if (Lexer.is(AsmToken::At)) {
1488           Lex(); // eat @
1489           SMLoc AtLoc = getLexer().getLoc();
1490           StringRef VName;
1491           if (parseIdentifier(VName))
1492             return Error(AtLoc, "expected symbol variant after '@'");
1493 
1494           Split = std::make_pair(Identifier, VName);
1495         }
1496       } else {
1497         Split = Identifier.split('@');
1498       }
1499     } else if (Lexer.is(AsmToken::LParen)) {
1500       Lex(); // eat '('.
1501       StringRef VName;
1502       parseIdentifier(VName);
1503       // eat ')'.
1504       if (parseToken(AsmToken::RParen,
1505                      "unexpected token in variant, expected ')'"))
1506         return true;
1507       Split = std::make_pair(Identifier, VName);
1508     }
1509 
1510     EndLoc = SMLoc::getFromPointer(Identifier.end());
1511 
1512     // This is a symbol reference.
1513     StringRef SymbolName = Identifier;
1514     if (SymbolName.empty())
1515       return Error(getLexer().getLoc(), "expected a symbol reference");
1516 
1517     MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1518 
1519     // Look up the symbol variant if used.
1520     if (!Split.second.empty()) {
1521       Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1522       if (Variant != MCSymbolRefExpr::VK_Invalid) {
1523         SymbolName = Split.first;
1524       } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1525         Variant = MCSymbolRefExpr::VK_None;
1526       } else {
1527         return Error(SMLoc::getFromPointer(Split.second.begin()),
1528                      "invalid variant '" + Split.second + "'");
1529       }
1530     }
1531 
1532     // Find the field offset if used.
1533     AsmFieldInfo Info;
1534     Split = SymbolName.split('.');
1535     if (Split.second.empty()) {
1536     } else {
1537       SymbolName = Split.first;
1538       if (lookUpField(SymbolName, Split.second, Info)) {
1539         std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1540         StringRef Base = BaseMember.first, Member = BaseMember.second;
1541         lookUpField(Base, Member, Info);
1542       } else if (Structs.count(SymbolName.lower())) {
1543         // This is actually a reference to a field offset.
1544         Res = MCConstantExpr::create(Info.Offset, getContext());
1545         return false;
1546       }
1547     }
1548 
1549     MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1550     if (!Sym) {
1551       // Variables use case-insensitive symbol names; if this is a variable, we
1552       // find the symbol using its canonical name.
1553       auto VarIt = Variables.find(SymbolName.lower());
1554       if (VarIt != Variables.end())
1555         SymbolName = VarIt->second.Name;
1556       Sym = getContext().getOrCreateSymbol(SymbolName);
1557     }
1558 
1559     // If this is an absolute variable reference, substitute it now to preserve
1560     // semantics in the face of reassignment.
1561     if (Sym->isVariable()) {
1562       auto V = Sym->getVariableValue(/*SetUsed*/ false);
1563       bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1564       if (auto TV = dyn_cast<MCTargetExpr>(V))
1565         DoInline = TV->inlineAssignedExpr();
1566       if (DoInline) {
1567         if (Variant)
1568           return Error(EndLoc, "unexpected modifier on variable reference");
1569         Res = Sym->getVariableValue(/*SetUsed*/ false);
1570         return false;
1571       }
1572     }
1573 
1574     // Otherwise create a symbol ref.
1575     const MCExpr *SymRef =
1576         MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1577     if (Info.Offset) {
1578       Res = MCBinaryExpr::create(
1579           MCBinaryExpr::Add, SymRef,
1580           MCConstantExpr::create(Info.Offset, getContext()), getContext());
1581     } else {
1582       Res = SymRef;
1583     }
1584     if (TypeInfo) {
1585       if (Info.Type.Name.empty()) {
1586         auto TypeIt = KnownType.find(Identifier.lower());
1587         if (TypeIt != KnownType.end()) {
1588           Info.Type = TypeIt->second;
1589         }
1590       }
1591 
1592       *TypeInfo = Info.Type;
1593     }
1594     return false;
1595   }
1596   case AsmToken::BigNum:
1597     return TokError("literal value out of range for directive");
1598   case AsmToken::Integer: {
1599     SMLoc Loc = getTok().getLoc();
1600     int64_t IntVal = getTok().getIntVal();
1601     Res = MCConstantExpr::create(IntVal, getContext());
1602     EndLoc = Lexer.getTok().getEndLoc();
1603     Lex(); // Eat token.
1604     // Look for 'b' or 'f' following an Integer as a directional label.
1605     if (Lexer.getKind() == AsmToken::Identifier) {
1606       StringRef IDVal = getTok().getString();
1607       // Look up the symbol variant if used.
1608       std::pair<StringRef, StringRef> Split = IDVal.split('@');
1609       MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1610       if (Split.first.size() != IDVal.size()) {
1611         Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1612         if (Variant == MCSymbolRefExpr::VK_Invalid)
1613           return TokError("invalid variant '" + Split.second + "'");
1614         IDVal = Split.first;
1615       }
1616       if (IDVal == "f" || IDVal == "b") {
1617         MCSymbol *Sym =
1618             Ctx.getDirectionalLocalSymbol(IntVal, IDVal == "b");
1619         Res = MCSymbolRefExpr::create(Sym, Variant, getContext());
1620         if (IDVal == "b" && Sym->isUndefined())
1621           return Error(Loc, "directional label undefined");
1622         DirLabels.push_back(std::make_tuple(Loc, CppHashInfo, Sym));
1623         EndLoc = Lexer.getTok().getEndLoc();
1624         Lex(); // Eat identifier.
1625       }
1626     }
1627     return false;
1628   }
1629   case AsmToken::String: {
1630     // MASM strings (used as constants) are interpreted as big-endian base-256.
1631     SMLoc ValueLoc = getTok().getLoc();
1632     std::string Value;
1633     if (parseEscapedString(Value))
1634       return true;
1635     if (Value.size() > 8)
1636       return Error(ValueLoc, "literal value out of range");
1637     uint64_t IntValue = 0;
1638     for (const unsigned char CharVal : Value)
1639       IntValue = (IntValue << 8) | CharVal;
1640     Res = MCConstantExpr::create(IntValue, getContext());
1641     return false;
1642   }
1643   case AsmToken::Real: {
1644     APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1645     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1646     Res = MCConstantExpr::create(IntVal, getContext());
1647     EndLoc = Lexer.getTok().getEndLoc();
1648     Lex(); // Eat token.
1649     return false;
1650   }
1651   case AsmToken::Dot: {
1652     // This is a '.' reference, which references the current PC.  Emit a
1653     // temporary label to the streamer and refer to it.
1654     MCSymbol *Sym = Ctx.createTempSymbol();
1655     Out.emitLabel(Sym);
1656     Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1657     EndLoc = Lexer.getTok().getEndLoc();
1658     Lex(); // Eat identifier.
1659     return false;
1660   }
1661   case AsmToken::LParen:
1662     Lex(); // Eat the '('.
1663     return parseParenExpr(Res, EndLoc);
1664   case AsmToken::LBrac:
1665     if (!PlatformParser->HasBracketExpressions())
1666       return TokError("brackets expression not supported on this target");
1667     Lex(); // Eat the '['.
1668     return parseBracketExpr(Res, EndLoc);
1669   case AsmToken::Minus:
1670     Lex(); // Eat the operator.
1671     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1672       return true;
1673     Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1674     return false;
1675   case AsmToken::Plus:
1676     Lex(); // Eat the operator.
1677     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1678       return true;
1679     Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1680     return false;
1681   case AsmToken::Tilde:
1682     Lex(); // Eat the operator.
1683     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1684       return true;
1685     Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1686     return false;
1687   // MIPS unary expression operators. The lexer won't generate these tokens if
1688   // MCAsmInfo::HasMipsExpressions is false for the target.
1689   case AsmToken::PercentCall16:
1690   case AsmToken::PercentCall_Hi:
1691   case AsmToken::PercentCall_Lo:
1692   case AsmToken::PercentDtprel_Hi:
1693   case AsmToken::PercentDtprel_Lo:
1694   case AsmToken::PercentGot:
1695   case AsmToken::PercentGot_Disp:
1696   case AsmToken::PercentGot_Hi:
1697   case AsmToken::PercentGot_Lo:
1698   case AsmToken::PercentGot_Ofst:
1699   case AsmToken::PercentGot_Page:
1700   case AsmToken::PercentGottprel:
1701   case AsmToken::PercentGp_Rel:
1702   case AsmToken::PercentHi:
1703   case AsmToken::PercentHigher:
1704   case AsmToken::PercentHighest:
1705   case AsmToken::PercentLo:
1706   case AsmToken::PercentNeg:
1707   case AsmToken::PercentPcrel_Hi:
1708   case AsmToken::PercentPcrel_Lo:
1709   case AsmToken::PercentTlsgd:
1710   case AsmToken::PercentTlsldm:
1711   case AsmToken::PercentTprel_Hi:
1712   case AsmToken::PercentTprel_Lo:
1713     Lex(); // Eat the operator.
1714     if (Lexer.isNot(AsmToken::LParen))
1715       return TokError("expected '(' after operator");
1716     Lex(); // Eat the operator.
1717     if (parseExpression(Res, EndLoc))
1718       return true;
1719     if (Lexer.isNot(AsmToken::RParen))
1720       return TokError("expected ')'");
1721     Lex(); // Eat the operator.
1722     Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1723     return !Res;
1724   }
1725 }
1726 
parseExpression(const MCExpr * & Res)1727 bool MasmParser::parseExpression(const MCExpr *&Res) {
1728   SMLoc EndLoc;
1729   return parseExpression(Res, EndLoc);
1730 }
1731 
1732 /// This function checks if the next token is <string> type or arithmetic.
1733 /// string that begin with character '<' must end with character '>'.
1734 /// otherwise it is arithmetics.
1735 /// If the function returns a 'true' value,
1736 /// the End argument will be filled with the last location pointed to the '>'
1737 /// character.
isAngleBracketString(SMLoc & StrLoc,SMLoc & EndLoc)1738 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1739   assert((StrLoc.getPointer() != nullptr) &&
1740          "Argument to the function cannot be a NULL value");
1741   const char *CharPtr = StrLoc.getPointer();
1742   while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1743          (*CharPtr != '\0')) {
1744     if (*CharPtr == '!')
1745       CharPtr++;
1746     CharPtr++;
1747   }
1748   if (*CharPtr == '>') {
1749     EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1750     return true;
1751   }
1752   return false;
1753 }
1754 
1755 /// creating a string without the escape characters '!'.
angleBracketString(StringRef BracketContents)1756 static std::string angleBracketString(StringRef BracketContents) {
1757   std::string Res;
1758   for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1759     if (BracketContents[Pos] == '!')
1760       Pos++;
1761     Res += BracketContents[Pos];
1762   }
1763   return Res;
1764 }
1765 
1766 /// Parse an expression and return it.
1767 ///
1768 ///  expr ::= expr &&,|| expr               -> lowest.
1769 ///  expr ::= expr |,^,&,! expr
1770 ///  expr ::= expr ==,!=,<>,<,<=,>,>= expr
1771 ///  expr ::= expr <<,>> expr
1772 ///  expr ::= expr +,- expr
1773 ///  expr ::= expr *,/,% expr               -> highest.
1774 ///  expr ::= primaryexpr
1775 ///
parseExpression(const MCExpr * & Res,SMLoc & EndLoc)1776 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1777   // Parse the expression.
1778   Res = nullptr;
1779   if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1780       parseBinOpRHS(1, Res, EndLoc))
1781     return true;
1782 
1783   // Try to constant fold it up front, if possible. Do not exploit
1784   // assembler here.
1785   int64_t Value;
1786   if (Res->evaluateAsAbsolute(Value))
1787     Res = MCConstantExpr::create(Value, getContext());
1788 
1789   return false;
1790 }
1791 
parseParenExpression(const MCExpr * & Res,SMLoc & EndLoc)1792 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1793   Res = nullptr;
1794   return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1795 }
1796 
parseParenExprOfDepth(unsigned ParenDepth,const MCExpr * & Res,SMLoc & EndLoc)1797 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1798                                        SMLoc &EndLoc) {
1799   if (parseParenExpr(Res, EndLoc))
1800     return true;
1801 
1802   for (; ParenDepth > 0; --ParenDepth) {
1803     if (parseBinOpRHS(1, Res, EndLoc))
1804       return true;
1805 
1806     // We don't Lex() the last RParen.
1807     // This is the same behavior as parseParenExpression().
1808     if (ParenDepth - 1 > 0) {
1809       EndLoc = getTok().getEndLoc();
1810       if (parseToken(AsmToken::RParen,
1811                      "expected ')' in parentheses expression"))
1812         return true;
1813     }
1814   }
1815   return false;
1816 }
1817 
parseAbsoluteExpression(int64_t & Res)1818 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1819   const MCExpr *Expr;
1820 
1821   SMLoc StartLoc = Lexer.getLoc();
1822   if (parseExpression(Expr))
1823     return true;
1824 
1825   if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1826     return Error(StartLoc, "expected absolute expression");
1827 
1828   return false;
1829 }
1830 
getGNUBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind,bool ShouldUseLogicalShr,bool EndExpressionAtGreater)1831 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
1832                                       MCBinaryExpr::Opcode &Kind,
1833                                       bool ShouldUseLogicalShr,
1834                                       bool EndExpressionAtGreater) {
1835   switch (K) {
1836   default:
1837     return 0; // not a binop.
1838 
1839   // Lowest Precedence: &&, ||
1840   case AsmToken::AmpAmp:
1841     Kind = MCBinaryExpr::LAnd;
1842     return 2;
1843   case AsmToken::PipePipe:
1844     Kind = MCBinaryExpr::LOr;
1845     return 1;
1846 
1847   // Low Precedence: ==, !=, <>, <, <=, >, >=
1848   case AsmToken::EqualEqual:
1849     Kind = MCBinaryExpr::EQ;
1850     return 3;
1851   case AsmToken::ExclaimEqual:
1852   case AsmToken::LessGreater:
1853     Kind = MCBinaryExpr::NE;
1854     return 3;
1855   case AsmToken::Less:
1856     Kind = MCBinaryExpr::LT;
1857     return 3;
1858   case AsmToken::LessEqual:
1859     Kind = MCBinaryExpr::LTE;
1860     return 3;
1861   case AsmToken::Greater:
1862     if (EndExpressionAtGreater)
1863       return 0;
1864     Kind = MCBinaryExpr::GT;
1865     return 3;
1866   case AsmToken::GreaterEqual:
1867     Kind = MCBinaryExpr::GTE;
1868     return 3;
1869 
1870   // Low Intermediate Precedence: +, -
1871   case AsmToken::Plus:
1872     Kind = MCBinaryExpr::Add;
1873     return 4;
1874   case AsmToken::Minus:
1875     Kind = MCBinaryExpr::Sub;
1876     return 4;
1877 
1878   // High Intermediate Precedence: |, &, ^
1879   case AsmToken::Pipe:
1880     Kind = MCBinaryExpr::Or;
1881     return 5;
1882   case AsmToken::Caret:
1883     Kind = MCBinaryExpr::Xor;
1884     return 5;
1885   case AsmToken::Amp:
1886     Kind = MCBinaryExpr::And;
1887     return 5;
1888 
1889   // Highest Precedence: *, /, %, <<, >>
1890   case AsmToken::Star:
1891     Kind = MCBinaryExpr::Mul;
1892     return 6;
1893   case AsmToken::Slash:
1894     Kind = MCBinaryExpr::Div;
1895     return 6;
1896   case AsmToken::Percent:
1897     Kind = MCBinaryExpr::Mod;
1898     return 6;
1899   case AsmToken::LessLess:
1900     Kind = MCBinaryExpr::Shl;
1901     return 6;
1902   case AsmToken::GreaterGreater:
1903     if (EndExpressionAtGreater)
1904       return 0;
1905     Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
1906     return 6;
1907   }
1908 }
1909 
getBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind)1910 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
1911                                         MCBinaryExpr::Opcode &Kind) {
1912   bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
1913   return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
1914                                AngleBracketDepth > 0);
1915 }
1916 
1917 /// Parse all binary operators with precedence >= 'Precedence'.
1918 /// Res contains the LHS of the expression on input.
parseBinOpRHS(unsigned Precedence,const MCExpr * & Res,SMLoc & EndLoc)1919 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
1920                                SMLoc &EndLoc) {
1921   SMLoc StartLoc = Lexer.getLoc();
1922   while (true) {
1923     AsmToken::TokenKind TokKind = Lexer.getKind();
1924     if (Lexer.getKind() == AsmToken::Identifier) {
1925       TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
1926                     .CaseLower("and", AsmToken::Amp)
1927                     .CaseLower("not", AsmToken::Exclaim)
1928                     .CaseLower("or", AsmToken::Pipe)
1929                     .CaseLower("eq", AsmToken::EqualEqual)
1930                     .CaseLower("ne", AsmToken::ExclaimEqual)
1931                     .CaseLower("lt", AsmToken::Less)
1932                     .CaseLower("le", AsmToken::LessEqual)
1933                     .CaseLower("gt", AsmToken::Greater)
1934                     .CaseLower("ge", AsmToken::GreaterEqual)
1935                     .Default(TokKind);
1936     }
1937     MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
1938     unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
1939 
1940     // If the next token is lower precedence than we are allowed to eat, return
1941     // successfully with what we ate already.
1942     if (TokPrec < Precedence)
1943       return false;
1944 
1945     Lex();
1946 
1947     // Eat the next primary expression.
1948     const MCExpr *RHS;
1949     if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
1950       return true;
1951 
1952     // If BinOp binds less tightly with RHS than the operator after RHS, let
1953     // the pending operator take RHS as its LHS.
1954     MCBinaryExpr::Opcode Dummy;
1955     unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
1956     if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
1957       return true;
1958 
1959     // Merge LHS and RHS according to operator.
1960     Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
1961   }
1962 }
1963 
1964 /// ParseStatement:
1965 ///   ::= % statement
1966 ///   ::= EndOfStatement
1967 ///   ::= Label* Directive ...Operands... EndOfStatement
1968 ///   ::= Label* Identifier OperandList* EndOfStatement
parseStatement(ParseStatementInfo & Info,MCAsmParserSemaCallback * SI)1969 bool MasmParser::parseStatement(ParseStatementInfo &Info,
1970                                 MCAsmParserSemaCallback *SI) {
1971   assert(!hasPendingError() && "parseStatement started with pending error");
1972   // Eat initial spaces and comments.
1973   while (Lexer.is(AsmToken::Space))
1974     Lex();
1975   if (Lexer.is(AsmToken::EndOfStatement)) {
1976     // If this is a line comment we can drop it safely.
1977     if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
1978         getTok().getString().front() == '\n')
1979       Out.AddBlankLine();
1980     Lex();
1981     return false;
1982   }
1983 
1984   // If preceded by an expansion operator, first expand all text macros and
1985   // macro functions.
1986   if (getTok().is(AsmToken::Percent)) {
1987     SMLoc ExpansionLoc = getTok().getLoc();
1988     if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
1989       return true;
1990   }
1991 
1992   // Statements always start with an identifier, unless we're dealing with a
1993   // processor directive (.386, .686, etc.) that lexes as a real.
1994   AsmToken ID = getTok();
1995   SMLoc IDLoc = ID.getLoc();
1996   StringRef IDVal;
1997   int64_t LocalLabelVal = -1;
1998   if (Lexer.is(AsmToken::HashDirective))
1999     return parseCppHashLineFilenameComment(IDLoc);
2000   // Allow an integer followed by a ':' as a directional local label.
2001   if (Lexer.is(AsmToken::Integer)) {
2002     LocalLabelVal = getTok().getIntVal();
2003     if (LocalLabelVal < 0) {
2004       if (!TheCondState.Ignore) {
2005         Lex(); // always eat a token
2006         return Error(IDLoc, "unexpected token at start of statement");
2007       }
2008       IDVal = "";
2009     } else {
2010       IDVal = getTok().getString();
2011       Lex(); // Consume the integer token to be used as an identifier token.
2012       if (Lexer.getKind() != AsmToken::Colon) {
2013         if (!TheCondState.Ignore) {
2014           Lex(); // always eat a token
2015           return Error(IDLoc, "unexpected token at start of statement");
2016         }
2017       }
2018     }
2019   } else if (Lexer.is(AsmToken::Dot)) {
2020     // Treat '.' as a valid identifier in this context.
2021     Lex();
2022     IDVal = ".";
2023   } else if (Lexer.is(AsmToken::LCurly)) {
2024     // Treat '{' as a valid identifier in this context.
2025     Lex();
2026     IDVal = "{";
2027 
2028   } else if (Lexer.is(AsmToken::RCurly)) {
2029     // Treat '}' as a valid identifier in this context.
2030     Lex();
2031     IDVal = "}";
2032   } else if (Lexer.is(AsmToken::Star) &&
2033              getTargetParser().starIsStartOfStatement()) {
2034     // Accept '*' as a valid start of statement.
2035     Lex();
2036     IDVal = "*";
2037   } else if (Lexer.is(AsmToken::Real)) {
2038     // Treat ".<number>" as a valid identifier in this context.
2039     IDVal = getTok().getString();
2040     Lex(); // always eat a token
2041     if (!IDVal.startswith("."))
2042       return Error(IDLoc, "unexpected token at start of statement");
2043   } else if (Lexer.is(AsmToken::Identifier) &&
2044              getTok().getString().equals_lower("echo")) {
2045     // Intercept echo early to avoid lexical substitution in its message, and
2046     // delegate all handling to the appropriate function.
2047     return parseDirectiveEcho();
2048   } else if (parseIdentifier(IDVal)) {
2049     if (!TheCondState.Ignore) {
2050       Lex(); // always eat a token
2051       return Error(IDLoc, "unexpected token at start of statement");
2052     }
2053     IDVal = "";
2054   }
2055 
2056   // Handle conditional assembly here before checking for skipping.  We
2057   // have to do this so that .endif isn't skipped in a ".if 0" block for
2058   // example.
2059   StringMap<DirectiveKind>::const_iterator DirKindIt =
2060       DirectiveKindMap.find(IDVal.lower());
2061   DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2062                               ? DK_NO_DIRECTIVE
2063                               : DirKindIt->getValue();
2064   switch (DirKind) {
2065   default:
2066     break;
2067   case DK_IF:
2068   case DK_IFE:
2069     return parseDirectiveIf(IDLoc, DirKind);
2070   case DK_IFB:
2071     return parseDirectiveIfb(IDLoc, true);
2072   case DK_IFNB:
2073     return parseDirectiveIfb(IDLoc, false);
2074   case DK_IFDEF:
2075     return parseDirectiveIfdef(IDLoc, true);
2076   case DK_IFNDEF:
2077     return parseDirectiveIfdef(IDLoc, false);
2078   case DK_IFDIF:
2079     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2080                                /*CaseInsensitive=*/false);
2081   case DK_IFDIFI:
2082     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2083                                /*CaseInsensitive=*/true);
2084   case DK_IFIDN:
2085     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2086                                /*CaseInsensitive=*/false);
2087   case DK_IFIDNI:
2088     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2089                                /*CaseInsensitive=*/true);
2090   case DK_ELSEIF:
2091   case DK_ELSEIFE:
2092     return parseDirectiveElseIf(IDLoc, DirKind);
2093   case DK_ELSEIFB:
2094     return parseDirectiveElseIfb(IDLoc, true);
2095   case DK_ELSEIFNB:
2096     return parseDirectiveElseIfb(IDLoc, false);
2097   case DK_ELSEIFDEF:
2098     return parseDirectiveElseIfdef(IDLoc, true);
2099   case DK_ELSEIFNDEF:
2100     return parseDirectiveElseIfdef(IDLoc, false);
2101   case DK_ELSEIFDIF:
2102     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2103                                    /*CaseInsensitive=*/false);
2104   case DK_ELSEIFDIFI:
2105     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2106                                    /*CaseInsensitive=*/true);
2107   case DK_ELSEIFIDN:
2108     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2109                                    /*CaseInsensitive=*/false);
2110   case DK_ELSEIFIDNI:
2111     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2112                                    /*CaseInsensitive=*/true);
2113   case DK_ELSE:
2114     return parseDirectiveElse(IDLoc);
2115   case DK_ENDIF:
2116     return parseDirectiveEndIf(IDLoc);
2117   }
2118 
2119   // Ignore the statement if in the middle of inactive conditional
2120   // (e.g. ".if 0").
2121   if (TheCondState.Ignore) {
2122     eatToEndOfStatement();
2123     return false;
2124   }
2125 
2126   // FIXME: Recurse on local labels?
2127 
2128   // See what kind of statement we have.
2129   switch (Lexer.getKind()) {
2130   case AsmToken::Colon: {
2131     if (!getTargetParser().isLabel(ID))
2132       break;
2133     if (checkForValidSection())
2134       return true;
2135 
2136     // identifier ':'   -> Label.
2137     Lex();
2138 
2139     // Diagnose attempt to use '.' as a label.
2140     if (IDVal == ".")
2141       return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2142 
2143     // Diagnose attempt to use a variable as a label.
2144     //
2145     // FIXME: Diagnostics. Note the location of the definition as a label.
2146     // FIXME: This doesn't diagnose assignment to a symbol which has been
2147     // implicitly marked as external.
2148     MCSymbol *Sym;
2149     if (LocalLabelVal == -1) {
2150       if (ParsingMSInlineAsm && SI) {
2151         StringRef RewrittenLabel =
2152             SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2153         assert(!RewrittenLabel.empty() &&
2154                "We should have an internal name here.");
2155         Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2156                                        RewrittenLabel);
2157         IDVal = RewrittenLabel;
2158       }
2159       Sym = getContext().getOrCreateSymbol(IDVal);
2160     } else
2161       Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
2162     // End of Labels should be treated as end of line for lexing
2163     // purposes but that information is not available to the Lexer who
2164     // does not understand Labels. This may cause us to see a Hash
2165     // here instead of a preprocessor line comment.
2166     if (getTok().is(AsmToken::Hash)) {
2167       std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2168       Lexer.Lex();
2169       Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2170     }
2171 
2172     // Consume any end of statement token, if present, to avoid spurious
2173     // AddBlankLine calls().
2174     if (getTok().is(AsmToken::EndOfStatement)) {
2175       Lex();
2176     }
2177 
2178     getTargetParser().doBeforeLabelEmit(Sym);
2179 
2180     // Emit the label.
2181     if (!getTargetParser().isParsingMSInlineAsm())
2182       Out.emitLabel(Sym, IDLoc);
2183 
2184     // If we are generating dwarf for assembly source files then gather the
2185     // info to make a dwarf label entry for this label if needed.
2186     if (enabledGenDwarfForAssembly())
2187       MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2188                                  IDLoc);
2189 
2190     getTargetParser().onLabelParsed(Sym);
2191 
2192     return false;
2193   }
2194 
2195   default: // Normal instruction or directive.
2196     break;
2197   }
2198 
2199   // If macros are enabled, check to see if this is a macro instantiation.
2200   if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2201     return handleMacroEntry(M, IDLoc);
2202   }
2203 
2204   // Otherwise, we have a normal instruction or directive.
2205 
2206   if (DirKind != DK_NO_DIRECTIVE) {
2207     // There are several entities interested in parsing directives:
2208     //
2209     // 1. Asm parser extensions. For example, platform-specific parsers
2210     //    (like the ELF parser) register themselves as extensions.
2211     // 2. The target-specific assembly parser. Some directives are target
2212     //    specific or may potentially behave differently on certain targets.
2213     // 3. The generic directive parser implemented by this class. These are
2214     //    all the directives that behave in a target and platform independent
2215     //    manner, or at least have a default behavior that's shared between
2216     //    all targets and platforms.
2217 
2218     getTargetParser().flushPendingInstructions(getStreamer());
2219 
2220     // Special-case handling of structure-end directives at higher priority,
2221     // since ENDS is overloaded as a segment-end directive.
2222     if (IDVal.equals_lower("ends") && StructInProgress.size() > 1 &&
2223         getTok().is(AsmToken::EndOfStatement)) {
2224       return parseDirectiveNestedEnds();
2225     }
2226 
2227     // First, check the extension directive map to see if any extension has
2228     // registered itself to parse this directive.
2229     std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2230         ExtensionDirectiveMap.lookup(IDVal.lower());
2231     if (Handler.first)
2232       return (*Handler.second)(Handler.first, IDVal, IDLoc);
2233 
2234     // Next, let the target-specific assembly parser try.
2235     SMLoc StartTokLoc = getTok().getLoc();
2236     bool TPDirectiveReturn =
2237         ID.is(AsmToken::Identifier) && getTargetParser().ParseDirective(ID);
2238 
2239     if (hasPendingError())
2240       return true;
2241     // Currently the return value should be true if we are
2242     // uninterested but as this is at odds with the standard parsing
2243     // convention (return true = error) we have instances of a parsed
2244     // directive that fails returning true as an error. Catch these
2245     // cases as best as possible errors here.
2246     if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
2247       return true;
2248     // Return if we did some parsing or believe we succeeded.
2249     if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
2250       return false;
2251 
2252     // Finally, if no one else is interested in this directive, it must be
2253     // generic and familiar to this class.
2254     switch (DirKind) {
2255     default:
2256       break;
2257     case DK_ASCII:
2258       return parseDirectiveAscii(IDVal, false);
2259     case DK_ASCIZ:
2260     case DK_STRING:
2261       return parseDirectiveAscii(IDVal, true);
2262     case DK_BYTE:
2263     case DK_SBYTE:
2264     case DK_DB:
2265       return parseDirectiveValue(IDVal, 1);
2266     case DK_WORD:
2267     case DK_SWORD:
2268     case DK_DW:
2269       return parseDirectiveValue(IDVal, 2);
2270     case DK_DWORD:
2271     case DK_SDWORD:
2272     case DK_DD:
2273       return parseDirectiveValue(IDVal, 4);
2274     case DK_FWORD:
2275     case DK_DF:
2276       return parseDirectiveValue(IDVal, 6);
2277     case DK_QWORD:
2278     case DK_SQWORD:
2279     case DK_DQ:
2280       return parseDirectiveValue(IDVal, 8);
2281     case DK_REAL4:
2282       return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2283     case DK_REAL8:
2284       return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2285     case DK_REAL10:
2286       return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2287     case DK_STRUCT:
2288     case DK_UNION:
2289       return parseDirectiveNestedStruct(IDVal, DirKind);
2290     case DK_ENDS:
2291       return parseDirectiveNestedEnds();
2292     case DK_ALIGN:
2293       return parseDirectiveAlign();
2294     case DK_ORG:
2295       return parseDirectiveOrg();
2296     case DK_EXTERN:
2297       eatToEndOfStatement(); // .extern is the default, ignore it.
2298       return false;
2299     case DK_PUBLIC:
2300       return parseDirectiveSymbolAttribute(MCSA_Global);
2301     case DK_COMM:
2302       return parseDirectiveComm(/*IsLocal=*/false);
2303     case DK_COMMENT:
2304       return parseDirectiveComment(IDLoc);
2305     case DK_INCLUDE:
2306       return parseDirectiveInclude();
2307     case DK_REPEAT:
2308       return parseDirectiveRepeat(IDLoc, IDVal);
2309     case DK_WHILE:
2310       return parseDirectiveWhile(IDLoc);
2311     case DK_FOR:
2312       return parseDirectiveFor(IDLoc, IDVal);
2313     case DK_FORC:
2314       return parseDirectiveForc(IDLoc, IDVal);
2315     case DK_FILE:
2316       return parseDirectiveFile(IDLoc);
2317     case DK_LINE:
2318       return parseDirectiveLine();
2319     case DK_LOC:
2320       return parseDirectiveLoc();
2321     case DK_STABS:
2322       return parseDirectiveStabs();
2323     case DK_CV_FILE:
2324       return parseDirectiveCVFile();
2325     case DK_CV_FUNC_ID:
2326       return parseDirectiveCVFuncId();
2327     case DK_CV_INLINE_SITE_ID:
2328       return parseDirectiveCVInlineSiteId();
2329     case DK_CV_LOC:
2330       return parseDirectiveCVLoc();
2331     case DK_CV_LINETABLE:
2332       return parseDirectiveCVLinetable();
2333     case DK_CV_INLINE_LINETABLE:
2334       return parseDirectiveCVInlineLinetable();
2335     case DK_CV_DEF_RANGE:
2336       return parseDirectiveCVDefRange();
2337     case DK_CV_STRING:
2338       return parseDirectiveCVString();
2339     case DK_CV_STRINGTABLE:
2340       return parseDirectiveCVStringTable();
2341     case DK_CV_FILECHECKSUMS:
2342       return parseDirectiveCVFileChecksums();
2343     case DK_CV_FILECHECKSUM_OFFSET:
2344       return parseDirectiveCVFileChecksumOffset();
2345     case DK_CV_FPO_DATA:
2346       return parseDirectiveCVFPOData();
2347     case DK_CFI_SECTIONS:
2348       return parseDirectiveCFISections();
2349     case DK_CFI_STARTPROC:
2350       return parseDirectiveCFIStartProc();
2351     case DK_CFI_ENDPROC:
2352       return parseDirectiveCFIEndProc();
2353     case DK_CFI_DEF_CFA:
2354       return parseDirectiveCFIDefCfa(IDLoc);
2355     case DK_CFI_DEF_CFA_OFFSET:
2356       return parseDirectiveCFIDefCfaOffset();
2357     case DK_CFI_ADJUST_CFA_OFFSET:
2358       return parseDirectiveCFIAdjustCfaOffset();
2359     case DK_CFI_DEF_CFA_REGISTER:
2360       return parseDirectiveCFIDefCfaRegister(IDLoc);
2361     case DK_CFI_OFFSET:
2362       return parseDirectiveCFIOffset(IDLoc);
2363     case DK_CFI_REL_OFFSET:
2364       return parseDirectiveCFIRelOffset(IDLoc);
2365     case DK_CFI_PERSONALITY:
2366       return parseDirectiveCFIPersonalityOrLsda(true);
2367     case DK_CFI_LSDA:
2368       return parseDirectiveCFIPersonalityOrLsda(false);
2369     case DK_CFI_REMEMBER_STATE:
2370       return parseDirectiveCFIRememberState();
2371     case DK_CFI_RESTORE_STATE:
2372       return parseDirectiveCFIRestoreState();
2373     case DK_CFI_SAME_VALUE:
2374       return parseDirectiveCFISameValue(IDLoc);
2375     case DK_CFI_RESTORE:
2376       return parseDirectiveCFIRestore(IDLoc);
2377     case DK_CFI_ESCAPE:
2378       return parseDirectiveCFIEscape();
2379     case DK_CFI_RETURN_COLUMN:
2380       return parseDirectiveCFIReturnColumn(IDLoc);
2381     case DK_CFI_SIGNAL_FRAME:
2382       return parseDirectiveCFISignalFrame();
2383     case DK_CFI_UNDEFINED:
2384       return parseDirectiveCFIUndefined(IDLoc);
2385     case DK_CFI_REGISTER:
2386       return parseDirectiveCFIRegister(IDLoc);
2387     case DK_CFI_WINDOW_SAVE:
2388       return parseDirectiveCFIWindowSave();
2389     case DK_EXITM:
2390       Info.ExitValue = "";
2391       return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2392     case DK_ENDM:
2393       Info.ExitValue = "";
2394       return parseDirectiveEndMacro(IDVal);
2395     case DK_PURGE:
2396       return parseDirectivePurgeMacro(IDLoc);
2397     case DK_END:
2398       return parseDirectiveEnd(IDLoc);
2399     case DK_ERR:
2400       return parseDirectiveError(IDLoc);
2401     case DK_ERRB:
2402       return parseDirectiveErrorIfb(IDLoc, true);
2403     case DK_ERRNB:
2404       return parseDirectiveErrorIfb(IDLoc, false);
2405     case DK_ERRDEF:
2406       return parseDirectiveErrorIfdef(IDLoc, true);
2407     case DK_ERRNDEF:
2408       return parseDirectiveErrorIfdef(IDLoc, false);
2409     case DK_ERRDIF:
2410       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2411                                       /*CaseInsensitive=*/false);
2412     case DK_ERRDIFI:
2413       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2414                                       /*CaseInsensitive=*/true);
2415     case DK_ERRIDN:
2416       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2417                                       /*CaseInsensitive=*/false);
2418     case DK_ERRIDNI:
2419       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2420                                       /*CaseInsensitive=*/true);
2421     case DK_ERRE:
2422       return parseDirectiveErrorIfe(IDLoc, true);
2423     case DK_ERRNZ:
2424       return parseDirectiveErrorIfe(IDLoc, false);
2425     case DK_RADIX:
2426       return parseDirectiveRadix(IDLoc);
2427     }
2428 
2429     return Error(IDLoc, "unknown directive");
2430   }
2431 
2432   // We also check if this is allocating memory with user-defined type.
2433   auto IDIt = Structs.find(IDVal.lower());
2434   if (IDIt != Structs.end())
2435     return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2436                                      IDLoc);
2437 
2438   // Non-conditional Microsoft directives sometimes follow their first argument.
2439   const AsmToken nextTok = getTok();
2440   const StringRef nextVal = nextTok.getString();
2441   const SMLoc nextLoc = nextTok.getLoc();
2442 
2443   // There are several entities interested in parsing infix directives:
2444   //
2445   // 1. Asm parser extensions. For example, platform-specific parsers
2446   //    (like the ELF parser) register themselves as extensions.
2447   // 2. The generic directive parser implemented by this class. These are
2448   //    all the directives that behave in a target and platform independent
2449   //    manner, or at least have a default behavior that's shared between
2450   //    all targets and platforms.
2451 
2452   getTargetParser().flushPendingInstructions(getStreamer());
2453 
2454   // Special-case handling of structure-end directives at higher priority, since
2455   // ENDS is overloaded as a segment-end directive.
2456   if (nextVal.equals_lower("ends") && StructInProgress.size() == 1) {
2457     Lex();
2458     return parseDirectiveEnds(IDVal, IDLoc);
2459   }
2460 
2461   // First, check the extension directive map to see if any extension has
2462   // registered itself to parse this directive.
2463   std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2464       ExtensionDirectiveMap.lookup(nextVal.lower());
2465   if (Handler.first) {
2466     Lex();
2467     Lexer.UnLex(ID);
2468     return (*Handler.second)(Handler.first, nextVal, nextLoc);
2469   }
2470 
2471   // If no one else is interested in this directive, it must be
2472   // generic and familiar to this class.
2473   DirKindIt = DirectiveKindMap.find(nextVal.lower());
2474   DirKind = (DirKindIt == DirectiveKindMap.end())
2475                 ? DK_NO_DIRECTIVE
2476                 : DirKindIt->getValue();
2477   switch (DirKind) {
2478   default:
2479     break;
2480   case DK_ASSIGN:
2481   case DK_EQU:
2482   case DK_TEXTEQU:
2483     Lex();
2484     return parseDirectiveEquate(nextVal, IDVal, DirKind);
2485   case DK_BYTE:
2486   case DK_SBYTE:
2487   case DK_DB:
2488     Lex();
2489     return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2490   case DK_WORD:
2491   case DK_SWORD:
2492   case DK_DW:
2493     Lex();
2494     return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2495   case DK_DWORD:
2496   case DK_SDWORD:
2497   case DK_DD:
2498     Lex();
2499     return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2500   case DK_FWORD:
2501   case DK_DF:
2502     Lex();
2503     return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2504   case DK_QWORD:
2505   case DK_SQWORD:
2506   case DK_DQ:
2507     Lex();
2508     return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2509   case DK_REAL4:
2510     Lex();
2511     return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2512                                         IDVal, IDLoc);
2513   case DK_REAL8:
2514     Lex();
2515     return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2516                                         IDVal, IDLoc);
2517   case DK_REAL10:
2518     Lex();
2519     return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2520                                         10, IDVal, IDLoc);
2521   case DK_STRUCT:
2522   case DK_UNION:
2523     Lex();
2524     return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2525   case DK_ENDS:
2526     Lex();
2527     return parseDirectiveEnds(IDVal, IDLoc);
2528   case DK_MACRO:
2529     Lex();
2530     return parseDirectiveMacro(IDVal, IDLoc);
2531   }
2532 
2533   // Finally, we check if this is allocating a variable with user-defined type.
2534   auto NextIt = Structs.find(nextVal.lower());
2535   if (NextIt != Structs.end()) {
2536     Lex();
2537     return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2538                                           nextVal, nextLoc, IDVal);
2539   }
2540 
2541   // __asm _emit or __asm __emit
2542   if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2543                              IDVal == "_EMIT" || IDVal == "__EMIT"))
2544     return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2545 
2546   // __asm align
2547   if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2548     return parseDirectiveMSAlign(IDLoc, Info);
2549 
2550   if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2551     Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2552   if (checkForValidSection())
2553     return true;
2554 
2555   // Canonicalize the opcode to lower case.
2556   std::string OpcodeStr = IDVal.lower();
2557   ParseInstructionInfo IInfo(Info.AsmRewrites);
2558   bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2559                                                           Info.ParsedOperands);
2560   Info.ParseError = ParseHadError;
2561 
2562   // Dump the parsed representation, if requested.
2563   if (getShowParsedOperands()) {
2564     SmallString<256> Str;
2565     raw_svector_ostream OS(Str);
2566     OS << "parsed instruction: [";
2567     for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2568       if (i != 0)
2569         OS << ", ";
2570       Info.ParsedOperands[i]->print(OS);
2571     }
2572     OS << "]";
2573 
2574     printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2575   }
2576 
2577   // Fail even if ParseInstruction erroneously returns false.
2578   if (hasPendingError() || ParseHadError)
2579     return true;
2580 
2581   // If we are generating dwarf for the current section then generate a .loc
2582   // directive for the instruction.
2583   if (!ParseHadError && enabledGenDwarfForAssembly() &&
2584       getContext().getGenDwarfSectionSyms().count(
2585           getStreamer().getCurrentSectionOnly())) {
2586     unsigned Line;
2587     if (ActiveMacros.empty())
2588       Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2589     else
2590       Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2591                                    ActiveMacros.front()->ExitBuffer);
2592 
2593     // If we previously parsed a cpp hash file line comment then make sure the
2594     // current Dwarf File is for the CppHashFilename if not then emit the
2595     // Dwarf File table for it and adjust the line number for the .loc.
2596     if (!CppHashInfo.Filename.empty()) {
2597       unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2598           0, StringRef(), CppHashInfo.Filename);
2599       getContext().setGenDwarfFileNumber(FileNumber);
2600 
2601       unsigned CppHashLocLineNo =
2602         SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2603       Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2604     }
2605 
2606     getStreamer().emitDwarfLocDirective(
2607         getContext().getGenDwarfFileNumber(), Line, 0,
2608         DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0,
2609         StringRef());
2610   }
2611 
2612   // If parsing succeeded, match the instruction.
2613   if (!ParseHadError) {
2614     uint64_t ErrorInfo;
2615     if (getTargetParser().MatchAndEmitInstruction(
2616             IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2617             getTargetParser().isParsingMSInlineAsm()))
2618       return true;
2619   }
2620   return false;
2621 }
2622 
2623 // Parse and erase curly braces marking block start/end.
parseCurlyBlockScope(SmallVectorImpl<AsmRewrite> & AsmStrRewrites)2624 bool MasmParser::parseCurlyBlockScope(
2625     SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2626   // Identify curly brace marking block start/end.
2627   if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2628     return false;
2629 
2630   SMLoc StartLoc = Lexer.getLoc();
2631   Lex(); // Eat the brace.
2632   if (Lexer.is(AsmToken::EndOfStatement))
2633     Lex(); // Eat EndOfStatement following the brace.
2634 
2635   // Erase the block start/end brace from the output asm string.
2636   AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2637                                                   StartLoc.getPointer());
2638   return true;
2639 }
2640 
2641 /// parseCppHashLineFilenameComment as this:
2642 ///   ::= # number "filename"
parseCppHashLineFilenameComment(SMLoc L)2643 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2644   Lex(); // Eat the hash token.
2645   // Lexer only ever emits HashDirective if it fully formed if it's
2646   // done the checking already so this is an internal error.
2647   assert(getTok().is(AsmToken::Integer) &&
2648          "Lexing Cpp line comment: Expected Integer");
2649   int64_t LineNumber = getTok().getIntVal();
2650   Lex();
2651   assert(getTok().is(AsmToken::String) &&
2652          "Lexing Cpp line comment: Expected String");
2653   StringRef Filename = getTok().getString();
2654   Lex();
2655 
2656   // Get rid of the enclosing quotes.
2657   Filename = Filename.substr(1, Filename.size() - 2);
2658 
2659   // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2660   // and possibly DWARF file info.
2661   CppHashInfo.Loc = L;
2662   CppHashInfo.Filename = Filename;
2663   CppHashInfo.LineNumber = LineNumber;
2664   CppHashInfo.Buf = CurBuffer;
2665   if (FirstCppHashFilename.empty())
2666     FirstCppHashFilename = Filename;
2667   return false;
2668 }
2669 
2670 /// will use the last parsed cpp hash line filename comment
2671 /// for the Filename and LineNo if any in the diagnostic.
DiagHandler(const SMDiagnostic & Diag,void * Context)2672 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2673   const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2674   raw_ostream &OS = errs();
2675 
2676   const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2677   SMLoc DiagLoc = Diag.getLoc();
2678   unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2679   unsigned CppHashBuf =
2680       Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2681 
2682   // Like SourceMgr::printMessage() we need to print the include stack if any
2683   // before printing the message.
2684   unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2685   if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2686       DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2687     SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2688     DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2689   }
2690 
2691   // If we have not parsed a cpp hash line filename comment or the source
2692   // manager changed or buffer changed (like in a nested include) then just
2693   // print the normal diagnostic using its Filename and LineNo.
2694   if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2695       DiagBuf != CppHashBuf) {
2696     if (Parser->SavedDiagHandler)
2697       Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2698     else
2699       Diag.print(nullptr, OS);
2700     return;
2701   }
2702 
2703   // Use the CppHashFilename and calculate a line number based on the
2704   // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2705   // for the diagnostic.
2706   const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2707 
2708   int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2709   int CppHashLocLineNo =
2710       Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2711   int LineNo =
2712       Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2713 
2714   SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2715                        Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2716                        Diag.getLineContents(), Diag.getRanges());
2717 
2718   if (Parser->SavedDiagHandler)
2719     Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2720   else
2721     NewDiag.print(nullptr, OS);
2722 }
2723 
2724 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2725 // not accept '.'.
isMacroParameterChar(char C)2726 static bool isMacroParameterChar(char C) {
2727   return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2728 }
2729 
expandMacro(raw_svector_ostream & OS,StringRef Body,ArrayRef<MCAsmMacroParameter> Parameters,ArrayRef<MCAsmMacroArgument> A,const std::vector<std::string> & Locals,SMLoc L)2730 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2731                              ArrayRef<MCAsmMacroParameter> Parameters,
2732                              ArrayRef<MCAsmMacroArgument> A,
2733                              const std::vector<std::string> &Locals, SMLoc L) {
2734   unsigned NParameters = Parameters.size();
2735   if (NParameters != A.size())
2736     return Error(L, "Wrong number of arguments");
2737   StringMap<std::string> LocalSymbols;
2738   std::string Name;
2739   Name.reserve(6);
2740   for (StringRef Local : Locals) {
2741     raw_string_ostream LocalName(Name);
2742     LocalName << "??"
2743               << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2744     LocalSymbols.insert({Local, LocalName.str()});
2745     Name.clear();
2746   }
2747 
2748   Optional<char> CurrentQuote;
2749   while (!Body.empty()) {
2750     // Scan for the next substitution.
2751     std::size_t End = Body.size(), Pos = 0;
2752     std::size_t IdentifierPos = End;
2753     for (; Pos != End; ++Pos) {
2754       // Find the next possible macro parameter, including preceding a '&'
2755       // inside quotes.
2756       if (Body[Pos] == '&')
2757         break;
2758       if (isMacroParameterChar(Body[Pos])) {
2759         if (!CurrentQuote.hasValue())
2760           break;
2761         if (IdentifierPos == End)
2762           IdentifierPos = Pos;
2763       } else {
2764         IdentifierPos = End;
2765       }
2766 
2767       // Track quotation status
2768       if (!CurrentQuote.hasValue()) {
2769         if (Body[Pos] == '\'' || Body[Pos] == '"')
2770           CurrentQuote = Body[Pos];
2771       } else if (Body[Pos] == CurrentQuote) {
2772         if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2773           // Escaped quote, and quotes aren't identifier chars; skip
2774           ++Pos;
2775           continue;
2776         } else {
2777           CurrentQuote.reset();
2778         }
2779       }
2780     }
2781     if (IdentifierPos != End) {
2782       // We've recognized an identifier before an apostrophe inside quotes;
2783       // check once to see if we can expand it.
2784       Pos = IdentifierPos;
2785       IdentifierPos = End;
2786     }
2787 
2788     // Add the prefix.
2789     OS << Body.slice(0, Pos);
2790 
2791     // Check if we reached the end.
2792     if (Pos == End)
2793       break;
2794 
2795     unsigned I = Pos;
2796     bool InitialAmpersand = (Body[I] == '&');
2797     if (InitialAmpersand) {
2798       ++I;
2799       ++Pos;
2800     }
2801     while (I < End && isMacroParameterChar(Body[I]))
2802       ++I;
2803 
2804     const char *Begin = Body.data() + Pos;
2805     StringRef Argument(Begin, I - Pos);
2806     unsigned Index = 0;
2807 
2808     for (; Index < NParameters; ++Index)
2809       if (Parameters[Index].Name == Argument)
2810         break;
2811 
2812     if (Index == NParameters) {
2813       if (InitialAmpersand)
2814         OS << '&';
2815       auto it = LocalSymbols.find(Argument.lower());
2816       if (it != LocalSymbols.end())
2817         OS << it->second;
2818       else
2819         OS << Argument;
2820       Pos = I;
2821     } else {
2822       for (const AsmToken &Token : A[Index]) {
2823         // In MASM, you can write '%expr'.
2824         // The prefix '%' evaluates the expression 'expr'
2825         // and uses the result as a string (e.g. replace %(1+2) with the
2826         // string "3").
2827         // Here, we identify the integer token which is the result of the
2828         // absolute expression evaluation and replace it with its string
2829         // representation.
2830         if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2831           // Emit an integer value to the buffer.
2832           OS << Token.getIntVal();
2833         else
2834           OS << Token.getString();
2835       }
2836 
2837       Pos += Argument.size();
2838       if (Pos < End && Body[Pos] == '&') {
2839         ++Pos;
2840       }
2841     }
2842     // Update the scan point.
2843     Body = Body.substr(Pos);
2844   }
2845 
2846   return false;
2847 }
2848 
isOperator(AsmToken::TokenKind kind)2849 static bool isOperator(AsmToken::TokenKind kind) {
2850   switch (kind) {
2851   default:
2852     return false;
2853   case AsmToken::Plus:
2854   case AsmToken::Minus:
2855   case AsmToken::Tilde:
2856   case AsmToken::Slash:
2857   case AsmToken::Star:
2858   case AsmToken::Dot:
2859   case AsmToken::Equal:
2860   case AsmToken::EqualEqual:
2861   case AsmToken::Pipe:
2862   case AsmToken::PipePipe:
2863   case AsmToken::Caret:
2864   case AsmToken::Amp:
2865   case AsmToken::AmpAmp:
2866   case AsmToken::Exclaim:
2867   case AsmToken::ExclaimEqual:
2868   case AsmToken::Less:
2869   case AsmToken::LessEqual:
2870   case AsmToken::LessLess:
2871   case AsmToken::LessGreater:
2872   case AsmToken::Greater:
2873   case AsmToken::GreaterEqual:
2874   case AsmToken::GreaterGreater:
2875     return true;
2876   }
2877 }
2878 
2879 namespace {
2880 
2881 class AsmLexerSkipSpaceRAII {
2882 public:
AsmLexerSkipSpaceRAII(AsmLexer & Lexer,bool SkipSpace)2883   AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
2884     Lexer.setSkipSpace(SkipSpace);
2885   }
2886 
~AsmLexerSkipSpaceRAII()2887   ~AsmLexerSkipSpaceRAII() {
2888     Lexer.setSkipSpace(true);
2889   }
2890 
2891 private:
2892   AsmLexer &Lexer;
2893 };
2894 
2895 } // end anonymous namespace
2896 
parseMacroArgument(const MCAsmMacroParameter * MP,MCAsmMacroArgument & MA,AsmToken::TokenKind EndTok)2897 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
2898                                     MCAsmMacroArgument &MA,
2899                                     AsmToken::TokenKind EndTok) {
2900   if (MP && MP->Vararg) {
2901     if (Lexer.isNot(EndTok)) {
2902       SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
2903       for (StringRef S : Str) {
2904         MA.emplace_back(AsmToken::String, S);
2905       }
2906     }
2907     return false;
2908   }
2909 
2910   SMLoc StrLoc = Lexer.getLoc(), EndLoc;
2911   if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
2912     const char *StrChar = StrLoc.getPointer() + 1;
2913     const char *EndChar = EndLoc.getPointer() - 1;
2914     jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
2915     /// Eat from '<' to '>'.
2916     Lex();
2917     MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
2918     return false;
2919   }
2920 
2921   unsigned ParenLevel = 0;
2922 
2923   // Darwin doesn't use spaces to delmit arguments.
2924   AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
2925 
2926   bool SpaceEaten;
2927 
2928   while (true) {
2929     SpaceEaten = false;
2930     if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
2931       return TokError("unexpected token");
2932 
2933     if (ParenLevel == 0) {
2934       if (Lexer.is(AsmToken::Comma))
2935         break;
2936 
2937       if (Lexer.is(AsmToken::Space)) {
2938         SpaceEaten = true;
2939         Lex(); // Eat spaces.
2940       }
2941 
2942       // Spaces can delimit parameters, but could also be part an expression.
2943       // If the token after a space is an operator, add the token and the next
2944       // one into this argument
2945       if (!IsDarwin) {
2946         if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
2947           MA.push_back(getTok());
2948           Lex();
2949 
2950           // Whitespace after an operator can be ignored.
2951           if (Lexer.is(AsmToken::Space))
2952             Lex();
2953 
2954           continue;
2955         }
2956       }
2957       if (SpaceEaten)
2958         break;
2959     }
2960 
2961     // handleMacroEntry relies on not advancing the lexer here
2962     // to be able to fill in the remaining default parameter values
2963     if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
2964       break;
2965 
2966     // Adjust the current parentheses level.
2967     if (Lexer.is(AsmToken::LParen))
2968       ++ParenLevel;
2969     else if (Lexer.is(AsmToken::RParen) && ParenLevel)
2970       --ParenLevel;
2971 
2972     // Append the token to the current argument list.
2973     MA.push_back(getTok());
2974     Lex();
2975   }
2976 
2977   if (ParenLevel != 0)
2978     return TokError("unbalanced parentheses in argument");
2979 
2980   if (MA.empty() && MP) {
2981     if (MP->Required) {
2982       return TokError("missing value for required parameter '" + MP->Name +
2983                       "'");
2984     } else {
2985       MA = MP->Value;
2986     }
2987   }
2988   return false;
2989 }
2990 
2991 // Parse the macro instantiation arguments.
parseMacroArguments(const MCAsmMacro * M,MCAsmMacroArguments & A,AsmToken::TokenKind EndTok)2992 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
2993                                      MCAsmMacroArguments &A,
2994                                      AsmToken::TokenKind EndTok) {
2995   const unsigned NParameters = M ? M->Parameters.size() : 0;
2996   bool NamedParametersFound = false;
2997   SmallVector<SMLoc, 4> FALocs;
2998 
2999   A.resize(NParameters);
3000   FALocs.resize(NParameters);
3001 
3002   // Parse two kinds of macro invocations:
3003   // - macros defined without any parameters accept an arbitrary number of them
3004   // - macros defined with parameters accept at most that many of them
3005   for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3006        ++Parameter) {
3007     SMLoc IDLoc = Lexer.getLoc();
3008     MCAsmMacroParameter FA;
3009 
3010     if (Lexer.is(AsmToken::Identifier) && Lexer.peekTok().is(AsmToken::Equal)) {
3011       if (parseIdentifier(FA.Name))
3012         return Error(IDLoc, "invalid argument identifier for formal argument");
3013 
3014       if (Lexer.isNot(AsmToken::Equal))
3015         return TokError("expected '=' after formal parameter identifier");
3016 
3017       Lex();
3018 
3019       NamedParametersFound = true;
3020     }
3021 
3022     if (NamedParametersFound && FA.Name.empty())
3023       return Error(IDLoc, "cannot mix positional and keyword arguments");
3024 
3025     unsigned PI = Parameter;
3026     if (!FA.Name.empty()) {
3027       assert(M && "expected macro to be defined");
3028       unsigned FAI = 0;
3029       for (FAI = 0; FAI < NParameters; ++FAI)
3030         if (M->Parameters[FAI].Name == FA.Name)
3031           break;
3032 
3033       if (FAI >= NParameters) {
3034         return Error(IDLoc, "parameter named '" + FA.Name +
3035                                 "' does not exist for macro '" + M->Name + "'");
3036       }
3037       PI = FAI;
3038     }
3039     const MCAsmMacroParameter *MP = nullptr;
3040     if (M && PI < NParameters)
3041       MP = &M->Parameters[PI];
3042 
3043     SMLoc StrLoc = Lexer.getLoc();
3044     SMLoc EndLoc;
3045     if (Lexer.is(AsmToken::Percent)) {
3046       const MCExpr *AbsoluteExp;
3047       int64_t Value;
3048       /// Eat '%'.
3049       Lex();
3050       if (parseExpression(AbsoluteExp, EndLoc))
3051         return false;
3052       if (!AbsoluteExp->evaluateAsAbsolute(Value,
3053                                            getStreamer().getAssemblerPtr()))
3054         return Error(StrLoc, "expected absolute expression");
3055       const char *StrChar = StrLoc.getPointer();
3056       const char *EndChar = EndLoc.getPointer();
3057       AsmToken newToken(AsmToken::Integer,
3058                         StringRef(StrChar, EndChar - StrChar), Value);
3059       FA.Value.push_back(newToken);
3060     } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3061       if (M)
3062         return addErrorSuffix(" in '" + M->Name + "' macro");
3063       else
3064         return true;
3065     }
3066 
3067     if (!FA.Value.empty()) {
3068       if (A.size() <= PI)
3069         A.resize(PI + 1);
3070       A[PI] = FA.Value;
3071 
3072       if (FALocs.size() <= PI)
3073         FALocs.resize(PI + 1);
3074 
3075       FALocs[PI] = Lexer.getLoc();
3076     }
3077 
3078     // At the end of the statement, fill in remaining arguments that have
3079     // default values. If there aren't any, then the next argument is
3080     // required but missing
3081     if (Lexer.is(EndTok)) {
3082       bool Failure = false;
3083       for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3084         if (A[FAI].empty()) {
3085           if (M->Parameters[FAI].Required) {
3086             Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3087                   "missing value for required parameter "
3088                   "'" +
3089                       M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3090             Failure = true;
3091           }
3092 
3093           if (!M->Parameters[FAI].Value.empty())
3094             A[FAI] = M->Parameters[FAI].Value;
3095         }
3096       }
3097       return Failure;
3098     }
3099 
3100     if (Lexer.is(AsmToken::Comma))
3101       Lex();
3102   }
3103 
3104   return TokError("too many positional arguments");
3105 }
3106 
handleMacroEntry(const MCAsmMacro * M,SMLoc NameLoc,AsmToken::TokenKind ArgumentEndTok)3107 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3108                                   AsmToken::TokenKind ArgumentEndTok) {
3109   // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3110   // eliminate this, although we should protect against infinite loops.
3111   unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3112   if (ActiveMacros.size() == MaxNestingDepth) {
3113     std::ostringstream MaxNestingDepthError;
3114     MaxNestingDepthError << "macros cannot be nested more than "
3115                          << MaxNestingDepth << " levels deep."
3116                          << " Use -asm-macro-max-nesting-depth to increase "
3117                             "this limit.";
3118     return TokError(MaxNestingDepthError.str());
3119   }
3120 
3121   MCAsmMacroArguments A;
3122   if (parseMacroArguments(M, A, ArgumentEndTok))
3123     return true;
3124 
3125   // Macro instantiation is lexical, unfortunately. We construct a new buffer
3126   // to hold the macro body with substitutions.
3127   SmallString<256> Buf;
3128   StringRef Body = M->Body;
3129   raw_svector_ostream OS(Buf);
3130 
3131   if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3132     return true;
3133 
3134   // We include the endm in the buffer as our cue to exit the macro
3135   // instantiation.
3136   OS << "endm\n";
3137 
3138   std::unique_ptr<MemoryBuffer> Instantiation =
3139       MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3140 
3141   // Create the macro instantiation object and add to the current macro
3142   // instantiation stack.
3143   MacroInstantiation *MI = new MacroInstantiation{
3144       NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3145   ActiveMacros.push_back(MI);
3146 
3147   ++NumOfMacroInstantiations;
3148 
3149   // Jump to the macro instantiation and prime the lexer.
3150   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3151   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3152   EndStatementAtEOFStack.push_back(true);
3153   Lex();
3154 
3155   return false;
3156 }
3157 
handleMacroExit()3158 void MasmParser::handleMacroExit() {
3159   // Jump to the token we should return to, and consume it.
3160   EndStatementAtEOFStack.pop_back();
3161   jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3162             EndStatementAtEOFStack.back());
3163   Lex();
3164 
3165   // Pop the instantiation entry.
3166   delete ActiveMacros.back();
3167   ActiveMacros.pop_back();
3168 }
3169 
handleMacroInvocation(const MCAsmMacro * M,SMLoc NameLoc)3170 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3171   if (!M->IsFunction)
3172     return Error(NameLoc, "cannot invoke macro procedure as function");
3173 
3174   if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3175                                        "' requires arguments in parentheses") ||
3176       handleMacroEntry(M, NameLoc, AsmToken::RParen))
3177     return true;
3178 
3179   // Parse all statements in the macro, retrieving the exit value when it ends.
3180   std::string ExitValue;
3181   SmallVector<AsmRewrite, 4> AsmStrRewrites;
3182   while (Lexer.isNot(AsmToken::Eof)) {
3183     ParseStatementInfo Info(&AsmStrRewrites);
3184     bool Parsed = parseStatement(Info, nullptr);
3185 
3186     if (!Parsed && Info.ExitValue.hasValue()) {
3187       ExitValue = std::move(*Info.ExitValue);
3188       break;
3189     }
3190 
3191     // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3192     // for printing ErrMsg via Lex() only if no (presumably better) parser error
3193     // exists.
3194     if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3195       Lex();
3196     }
3197 
3198     // parseStatement returned true so may need to emit an error.
3199     printPendingErrors();
3200 
3201     // Skipping to the next line if needed.
3202     if (Parsed && !getLexer().isAtStartOfStatement())
3203       eatToEndOfStatement();
3204   }
3205 
3206   // Consume the right-parenthesis on the other side of the arguments.
3207   if (parseToken(AsmToken::RParen, "invoking macro function '" + M->Name +
3208                                        "' requires arguments in parentheses"))
3209     return true;
3210 
3211   // Exit values may require lexing, unfortunately. We construct a new buffer to
3212   // hold the exit value.
3213   std::unique_ptr<MemoryBuffer> MacroValue =
3214       MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3215 
3216   // Jump from this location to the instantiated exit value, and prime the
3217   // lexer.
3218   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3219   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3220                   /*EndStatementAtEOF=*/false);
3221   EndStatementAtEOFStack.push_back(false);
3222   Lex();
3223 
3224   return false;
3225 }
3226 
3227 /// parseIdentifier:
3228 ///   ::= identifier
3229 ///   ::= string
parseIdentifier(StringRef & Res)3230 bool MasmParser::parseIdentifier(StringRef &Res) {
3231   // The assembler has relaxed rules for accepting identifiers, in particular we
3232   // allow things like '.globl $foo' and '.def @feat.00', which would normally
3233   // be separate tokens. At this level, we have already lexed so we cannot
3234   // (currently) handle this as a context dependent token, instead we detect
3235   // adjacent tokens and return the combined identifier.
3236   if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3237     SMLoc PrefixLoc = getLexer().getLoc();
3238 
3239     // Consume the prefix character, and check for a following identifier.
3240 
3241     AsmToken Buf[1];
3242     Lexer.peekTokens(Buf, false);
3243 
3244     if (Buf[0].isNot(AsmToken::Identifier))
3245       return true;
3246 
3247     // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3248     if (PrefixLoc.getPointer() + 1 != Buf[0].getLoc().getPointer())
3249       return true;
3250 
3251     // eat $ or @
3252     Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3253     // Construct the joined identifier and consume the token.
3254     Res =
3255         StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3256     Lex(); // Parser Lex to maintain invariants.
3257     return false;
3258   }
3259 
3260   if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3261     return true;
3262 
3263   Res = getTok().getIdentifier();
3264 
3265   Lex(); // Consume the identifier token.
3266 
3267   return false;
3268 }
3269 
3270 /// parseDirectiveEquate:
3271 ///  ::= name "=" expression
3272 ///    | name "equ" expression    (not redefinable)
3273 ///    | name "equ" text-list
3274 ///    | name "textequ" text-list
parseDirectiveEquate(StringRef IDVal,StringRef Name,DirectiveKind DirKind)3275 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3276                                       DirectiveKind DirKind) {
3277   Variable &Var = Variables[Name.lower()];
3278   if (Var.Name.empty()) {
3279     Var.Name = Name;
3280   } else if (!Var.Redefinable) {
3281     return TokError("invalid variable redefinition");
3282   }
3283   Var.Redefinable = (DirKind != DK_EQU);
3284 
3285   SMLoc StartLoc = Lexer.getLoc();
3286   if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3287     // "equ" and "textequ" both allow text expressions.
3288     std::string Value;
3289     if (!parseTextItem(Value)) {
3290       Var.IsText = true;
3291       Var.TextValue = Value;
3292 
3293       // Accept a text-list, not just one text-item.
3294       auto parseItem = [&]() -> bool {
3295         if (parseTextItem(Value))
3296           return TokError("expected text item");
3297         Var.TextValue += Value;
3298         return false;
3299       };
3300       if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3301         return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3302 
3303       return false;
3304     }
3305   }
3306   if (DirKind == DK_TEXTEQU)
3307     return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3308 
3309   // Parse as expression assignment.
3310   const MCExpr *Expr;
3311   SMLoc EndLoc;
3312   if (parseExpression(Expr, EndLoc))
3313     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3314   MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3315   Sym->setRedefinable(Var.Redefinable);
3316   Sym->setVariableValue(Expr);
3317   Sym->setExternal(false);
3318 
3319   if (Expr->evaluateAsAbsolute(Var.NumericValue,
3320                                getStreamer().getAssemblerPtr()))
3321     return false;
3322 
3323   // Not an absolute expression; define as a text replacement.
3324   Var.IsText = true;
3325   Var.TextValue = StringRef(StartLoc.getPointer(),
3326                             EndLoc.getPointer() - StartLoc.getPointer()).str();
3327   return false;
3328 }
3329 
parseEscapedString(std::string & Data)3330 bool MasmParser::parseEscapedString(std::string &Data) {
3331   if (check(getTok().isNot(AsmToken::String), "expected string"))
3332     return true;
3333 
3334   Data = "";
3335   char Quote = getTok().getString().front();
3336   StringRef Str = getTok().getStringContents();
3337   Data.reserve(Str.size());
3338   for (size_t i = 0, e = Str.size(); i != e; ++i) {
3339     Data.push_back(Str[i]);
3340     if (Str[i] == Quote) {
3341       // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3342       // If we're escaping the string's trailing delimiter, we're definitely
3343       // missing a quotation mark.
3344       if (i + 1 == Str.size())
3345         return Error(getTok().getLoc(), "missing quotation mark in string");
3346       if (Str[i + 1] == Quote)
3347         ++i;
3348     }
3349   }
3350 
3351   Lex();
3352   return false;
3353 }
3354 
parseAngleBracketString(std::string & Data)3355 bool MasmParser::parseAngleBracketString(std::string &Data) {
3356   SMLoc EndLoc, StartLoc = getTok().getLoc();
3357   if (isAngleBracketString(StartLoc, EndLoc)) {
3358     const char *StartChar = StartLoc.getPointer() + 1;
3359     const char *EndChar = EndLoc.getPointer() - 1;
3360     jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3361     // Eat from '<' to '>'.
3362     Lex();
3363 
3364     Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3365     return false;
3366   }
3367   return true;
3368 }
3369 
3370 /// textItem ::= textLiteral | textMacroID | % constExpr
parseTextItem(std::string & Data)3371 bool MasmParser::parseTextItem(std::string &Data) {
3372   switch (getTok().getKind()) {
3373   default:
3374     return true;
3375   case AsmToken::Percent: {
3376     int64_t Res;
3377     if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3378       return true;
3379     Data = std::to_string(Res);
3380     return false;
3381   }
3382   case AsmToken::Less:
3383   case AsmToken::LessEqual:
3384   case AsmToken::LessLess:
3385   case AsmToken::LessGreater:
3386     return parseAngleBracketString(Data);
3387   case AsmToken::Identifier: {
3388     // This must be a text macro; we need to expand it accordingly.
3389     StringRef ID;
3390     if (parseIdentifier(ID))
3391       return true;
3392     Data = ID.str();
3393 
3394     auto it = Variables.find(ID.lower());
3395     if (it == Variables.end()) {
3396       // Not a variable; since we haven't used the token, put it back for better
3397       // error recovery.
3398       getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3399       return true;
3400     }
3401 
3402     while (it != Variables.end()) {
3403       const Variable &Var = it->second;
3404       if (!Var.IsText) {
3405         // Not a text macro; not usable in TextItem context. Since we haven't
3406         // used the token, put it back for better error recovery.
3407         getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3408         return true;
3409       }
3410       Data = Var.TextValue;
3411       it = Variables.find(StringRef(Data).lower());
3412     }
3413     return false;
3414   }
3415   }
3416   llvm_unreachable("unhandled token kind");
3417 }
3418 
3419 /// parseDirectiveAscii:
3420 ///   ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
parseDirectiveAscii(StringRef IDVal,bool ZeroTerminated)3421 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3422   auto parseOp = [&]() -> bool {
3423     std::string Data;
3424     if (checkForValidSection() || parseEscapedString(Data))
3425       return true;
3426     getStreamer().emitBytes(Data);
3427     if (ZeroTerminated)
3428       getStreamer().emitBytes(StringRef("\0", 1));
3429     return false;
3430   };
3431 
3432   if (parseMany(parseOp))
3433     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3434   return false;
3435 }
3436 
emitIntValue(const MCExpr * Value,unsigned Size)3437 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3438   // Special case constant expressions to match code generator.
3439   if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3440     assert(Size <= 8 && "Invalid size");
3441     int64_t IntValue = MCE->getValue();
3442     if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3443       return Error(MCE->getLoc(), "out of range literal value");
3444     getStreamer().emitIntValue(IntValue, Size);
3445   } else {
3446     const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3447     if (MSE && MSE->getSymbol().getName() == "?") {
3448       // ? initializer; treat as 0.
3449       getStreamer().emitIntValue(0, Size);
3450     } else {
3451       getStreamer().emitValue(Value, Size, Value->getLoc());
3452     }
3453   }
3454   return false;
3455 }
3456 
parseScalarInitializer(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,unsigned StringPadLength)3457 bool MasmParser::parseScalarInitializer(unsigned Size,
3458                                         SmallVectorImpl<const MCExpr *> &Values,
3459                                         unsigned StringPadLength) {
3460   if (Size == 1 && getTok().is(AsmToken::String)) {
3461     std::string Value;
3462     if (parseEscapedString(Value))
3463       return true;
3464     // Treat each character as an initializer.
3465     for (const unsigned char CharVal : Value)
3466       Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3467 
3468     // Pad the string with spaces to the specified length.
3469     for (size_t i = Value.size(); i < StringPadLength; ++i)
3470       Values.push_back(MCConstantExpr::create(' ', getContext()));
3471   } else {
3472     const MCExpr *Value;
3473     if (parseExpression(Value))
3474       return true;
3475     if (getTok().is(AsmToken::Identifier) &&
3476         getTok().getString().equals_lower("dup")) {
3477       Lex(); // Eat 'dup'.
3478       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3479       if (!MCE)
3480         return Error(Value->getLoc(),
3481                      "cannot repeat value a non-constant number of times");
3482       const int64_t Repetitions = MCE->getValue();
3483       if (Repetitions < 0)
3484         return Error(Value->getLoc(),
3485                      "cannot repeat value a negative number of times");
3486 
3487       SmallVector<const MCExpr *, 1> DuplicatedValues;
3488       if (parseToken(AsmToken::LParen,
3489                      "parentheses required for 'dup' contents") ||
3490           parseScalarInstList(Size, DuplicatedValues) ||
3491           parseToken(AsmToken::RParen, "unmatched parentheses"))
3492         return true;
3493 
3494       for (int i = 0; i < Repetitions; ++i)
3495         Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3496     } else {
3497       Values.push_back(Value);
3498     }
3499   }
3500   return false;
3501 }
3502 
parseScalarInstList(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,const AsmToken::TokenKind EndToken)3503 bool MasmParser::parseScalarInstList(unsigned Size,
3504                                      SmallVectorImpl<const MCExpr *> &Values,
3505                                      const AsmToken::TokenKind EndToken) {
3506   while (getTok().isNot(EndToken) &&
3507          (EndToken != AsmToken::Greater ||
3508           getTok().isNot(AsmToken::GreaterGreater))) {
3509     parseScalarInitializer(Size, Values);
3510 
3511     // If we see a comma, continue, and allow line continuation.
3512     if (!parseOptionalToken(AsmToken::Comma))
3513       break;
3514     parseOptionalToken(AsmToken::EndOfStatement);
3515   }
3516   return false;
3517 }
3518 
emitIntegralValues(unsigned Size,unsigned * Count)3519 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3520   SmallVector<const MCExpr *, 1> Values;
3521   if (checkForValidSection() || parseScalarInstList(Size, Values))
3522     return true;
3523 
3524   for (auto Value : Values) {
3525     emitIntValue(Value, Size);
3526   }
3527   if (Count)
3528     *Count = Values.size();
3529   return false;
3530 }
3531 
3532 // Add a field to the current structure.
addIntegralField(StringRef Name,unsigned Size)3533 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3534   StructInfo &Struct = StructInProgress.back();
3535   FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3536   IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3537 
3538   Field.Type = Size;
3539 
3540   if (parseScalarInstList(Size, IntInfo.Values))
3541     return true;
3542 
3543   Field.SizeOf = Field.Type * IntInfo.Values.size();
3544   Field.LengthOf = IntInfo.Values.size();
3545   if (Struct.IsUnion)
3546     Struct.Size = std::max(Struct.Size, Field.SizeOf);
3547   else
3548     Struct.Size += Field.SizeOf;
3549   return false;
3550 }
3551 
3552 /// parseDirectiveValue
3553 ///  ::= (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveValue(StringRef IDVal,unsigned Size)3554 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3555   if (StructInProgress.empty()) {
3556     // Initialize data value.
3557     if (emitIntegralValues(Size))
3558       return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3559   } else if (addIntegralField("", Size)) {
3560     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3561   }
3562 
3563   return false;
3564 }
3565 
3566 /// parseDirectiveNamedValue
3567 ///  ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedValue(StringRef TypeName,unsigned Size,StringRef Name,SMLoc NameLoc)3568 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3569                                           StringRef Name, SMLoc NameLoc) {
3570   if (StructInProgress.empty()) {
3571     // Initialize named data value.
3572     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3573     getStreamer().emitLabel(Sym);
3574     unsigned Count;
3575     if (emitIntegralValues(Size, &Count))
3576       return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3577 
3578     AsmTypeInfo Type;
3579     Type.Name = TypeName;
3580     Type.Size = Size * Count;
3581     Type.ElementSize = Size;
3582     Type.Length = Count;
3583     KnownType[Name.lower()] = Type;
3584   } else if (addIntegralField(Name, Size)) {
3585     return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3586   }
3587 
3588   return false;
3589 }
3590 
parseHexOcta(MasmParser & Asm,uint64_t & hi,uint64_t & lo)3591 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3592   if (Asm.getTok().isNot(AsmToken::Integer) &&
3593       Asm.getTok().isNot(AsmToken::BigNum))
3594     return Asm.TokError("unknown token in expression");
3595   SMLoc ExprLoc = Asm.getTok().getLoc();
3596   APInt IntValue = Asm.getTok().getAPIntVal();
3597   Asm.Lex();
3598   if (!IntValue.isIntN(128))
3599     return Asm.Error(ExprLoc, "out of range literal value");
3600   if (!IntValue.isIntN(64)) {
3601     hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3602     lo = IntValue.getLoBits(64).getZExtValue();
3603   } else {
3604     hi = 0;
3605     lo = IntValue.getZExtValue();
3606   }
3607   return false;
3608 }
3609 
parseRealValue(const fltSemantics & Semantics,APInt & Res)3610 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3611   // We don't truly support arithmetic on floating point expressions, so we
3612   // have to manually parse unary prefixes.
3613   bool IsNeg = false;
3614   SMLoc SignLoc;
3615   if (getLexer().is(AsmToken::Minus)) {
3616     SignLoc = getLexer().getLoc();
3617     Lexer.Lex();
3618     IsNeg = true;
3619   } else if (getLexer().is(AsmToken::Plus)) {
3620     SignLoc = getLexer().getLoc();
3621     Lexer.Lex();
3622   }
3623 
3624   if (Lexer.is(AsmToken::Error))
3625     return TokError(Lexer.getErr());
3626   if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3627       Lexer.isNot(AsmToken::Identifier))
3628     return TokError("unexpected token in directive");
3629 
3630   // Convert to an APFloat.
3631   APFloat Value(Semantics);
3632   StringRef IDVal = getTok().getString();
3633   if (getLexer().is(AsmToken::Identifier)) {
3634     if (IDVal.equals_lower("infinity") || IDVal.equals_lower("inf"))
3635       Value = APFloat::getInf(Semantics);
3636     else if (IDVal.equals_lower("nan"))
3637       Value = APFloat::getNaN(Semantics, false, ~0);
3638     else if (IDVal.equals_lower("?"))
3639       Value = APFloat::getZero(Semantics);
3640     else
3641       return TokError("invalid floating point literal");
3642   } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3643     // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3644     // To match ML64.exe, ignore the initial sign.
3645     unsigned SizeInBits = Value.getSizeInBits(Semantics);
3646     if (SizeInBits != (IDVal.size() << 2))
3647       return TokError("invalid floating point literal");
3648 
3649     // Consume the numeric token.
3650     Lex();
3651 
3652     Res = APInt(SizeInBits, IDVal, 16);
3653     if (SignLoc.isValid())
3654       return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3655     return false;
3656   } else if (errorToBool(
3657                  Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3658                      .takeError())) {
3659     return TokError("invalid floating point literal");
3660   }
3661   if (IsNeg)
3662     Value.changeSign();
3663 
3664   // Consume the numeric token.
3665   Lex();
3666 
3667   Res = Value.bitcastToAPInt();
3668 
3669   return false;
3670 }
3671 
parseRealInstList(const fltSemantics & Semantics,SmallVectorImpl<APInt> & ValuesAsInt,const AsmToken::TokenKind EndToken)3672 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3673                                    SmallVectorImpl<APInt> &ValuesAsInt,
3674                                    const AsmToken::TokenKind EndToken) {
3675   while (getTok().isNot(EndToken) ||
3676          (EndToken == AsmToken::Greater &&
3677           getTok().isNot(AsmToken::GreaterGreater))) {
3678     const AsmToken NextTok = Lexer.peekTok();
3679     if (NextTok.is(AsmToken::Identifier) &&
3680         NextTok.getString().equals_lower("dup")) {
3681       const MCExpr *Value;
3682       if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3683         return true;
3684       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3685       if (!MCE)
3686         return Error(Value->getLoc(),
3687                      "cannot repeat value a non-constant number of times");
3688       const int64_t Repetitions = MCE->getValue();
3689       if (Repetitions < 0)
3690         return Error(Value->getLoc(),
3691                      "cannot repeat value a negative number of times");
3692 
3693       SmallVector<APInt, 1> DuplicatedValues;
3694       if (parseToken(AsmToken::LParen,
3695                      "parentheses required for 'dup' contents") ||
3696           parseRealInstList(Semantics, DuplicatedValues) ||
3697           parseToken(AsmToken::RParen, "unmatched parentheses"))
3698         return true;
3699 
3700       for (int i = 0; i < Repetitions; ++i)
3701         ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3702     } else {
3703       APInt AsInt;
3704       if (parseRealValue(Semantics, AsInt))
3705         return true;
3706       ValuesAsInt.push_back(AsInt);
3707     }
3708 
3709     // Continue if we see a comma. (Also, allow line continuation.)
3710     if (!parseOptionalToken(AsmToken::Comma))
3711       break;
3712     parseOptionalToken(AsmToken::EndOfStatement);
3713   }
3714 
3715   return false;
3716 }
3717 
3718 // Initialize real data values.
emitRealValues(const fltSemantics & Semantics,unsigned * Count)3719 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3720                                 unsigned *Count) {
3721   if (checkForValidSection())
3722     return true;
3723 
3724   SmallVector<APInt, 1> ValuesAsInt;
3725   if (parseRealInstList(Semantics, ValuesAsInt))
3726     return true;
3727 
3728   for (const APInt &AsInt : ValuesAsInt) {
3729     getStreamer().emitIntValue(AsInt);
3730   }
3731   if (Count)
3732     *Count = ValuesAsInt.size();
3733   return false;
3734 }
3735 
3736 // Add a real field to the current struct.
addRealField(StringRef Name,const fltSemantics & Semantics,size_t Size)3737 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3738                               size_t Size) {
3739   StructInfo &Struct = StructInProgress.back();
3740   FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3741   RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3742 
3743   Field.SizeOf = 0;
3744 
3745   if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3746     return true;
3747 
3748   Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3749   Field.LengthOf = RealInfo.AsIntValues.size();
3750   Field.SizeOf = Field.Type * Field.LengthOf;
3751   if (Struct.IsUnion)
3752     Struct.Size = std::max(Struct.Size, Field.SizeOf);
3753   else
3754     Struct.Size += Field.SizeOf;
3755   return false;
3756 }
3757 
3758 /// parseDirectiveRealValue
3759 ///  ::= (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveRealValue(StringRef IDVal,const fltSemantics & Semantics,size_t Size)3760 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3761                                          const fltSemantics &Semantics,
3762                                          size_t Size) {
3763   if (StructInProgress.empty()) {
3764     // Initialize data value.
3765     if (emitRealValues(Semantics))
3766       return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3767   } else if (addRealField("", Semantics, Size)) {
3768     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3769   }
3770   return false;
3771 }
3772 
3773 /// parseDirectiveNamedRealValue
3774 ///  ::= name (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveNamedRealValue(StringRef TypeName,const fltSemantics & Semantics,unsigned Size,StringRef Name,SMLoc NameLoc)3775 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3776                                               const fltSemantics &Semantics,
3777                                               unsigned Size, StringRef Name,
3778                                               SMLoc NameLoc) {
3779   if (StructInProgress.empty()) {
3780     // Initialize named data value.
3781     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3782     getStreamer().emitLabel(Sym);
3783     unsigned Count;
3784     if (emitRealValues(Semantics, &Count))
3785       return addErrorSuffix(" in '" + TypeName + "' directive");
3786 
3787     AsmTypeInfo Type;
3788     Type.Name = TypeName;
3789     Type.Size = Size * Count;
3790     Type.ElementSize = Size;
3791     Type.Length = Count;
3792     KnownType[Name.lower()] = Type;
3793   } else if (addRealField(Name, Semantics, Size)) {
3794     return addErrorSuffix(" in '" + TypeName + "' directive");
3795   }
3796   return false;
3797 }
3798 
parseOptionalAngleBracketOpen()3799 bool MasmParser::parseOptionalAngleBracketOpen() {
3800   const AsmToken Tok = getTok();
3801   if (parseOptionalToken(AsmToken::LessLess)) {
3802     AngleBracketDepth++;
3803     Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
3804     return true;
3805   } else if (parseOptionalToken(AsmToken::LessGreater)) {
3806     AngleBracketDepth++;
3807     Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
3808     return true;
3809   } else if (parseOptionalToken(AsmToken::Less)) {
3810     AngleBracketDepth++;
3811     return true;
3812   }
3813 
3814   return false;
3815 }
3816 
parseAngleBracketClose(const Twine & Msg)3817 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
3818   const AsmToken Tok = getTok();
3819   if (parseOptionalToken(AsmToken::GreaterGreater)) {
3820     Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
3821   } else if (parseToken(AsmToken::Greater, Msg)) {
3822     return true;
3823   }
3824   AngleBracketDepth--;
3825   return false;
3826 }
3827 
parseFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,FieldInitializer & Initializer)3828 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3829                                        const IntFieldInfo &Contents,
3830                                        FieldInitializer &Initializer) {
3831   SMLoc Loc = getTok().getLoc();
3832 
3833   SmallVector<const MCExpr *, 1> Values;
3834   if (parseOptionalToken(AsmToken::LCurly)) {
3835     if (Field.LengthOf == 1 && Field.Type > 1)
3836       return Error(Loc, "Cannot initialize scalar field with array value");
3837     if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
3838         parseToken(AsmToken::RCurly))
3839       return true;
3840   } else if (parseOptionalAngleBracketOpen()) {
3841     if (Field.LengthOf == 1 && Field.Type > 1)
3842       return Error(Loc, "Cannot initialize scalar field with array value");
3843     if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
3844         parseAngleBracketClose())
3845       return true;
3846   } else if (Field.LengthOf > 1 && Field.Type > 1) {
3847     return Error(Loc, "Cannot initialize array field with scalar value");
3848   } else if (parseScalarInitializer(Field.Type, Values,
3849                                     /*StringPadLength=*/Field.LengthOf)) {
3850     return true;
3851   }
3852 
3853   if (Values.size() > Field.LengthOf) {
3854     return Error(Loc, "Initializer too long for field; expected at most " +
3855                           std::to_string(Field.LengthOf) + " elements, got " +
3856                           std::to_string(Values.size()));
3857   }
3858   // Default-initialize all remaining values.
3859   Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
3860 
3861   Initializer = FieldInitializer(std::move(Values));
3862   return false;
3863 }
3864 
parseFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,FieldInitializer & Initializer)3865 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3866                                        const RealFieldInfo &Contents,
3867                                        FieldInitializer &Initializer) {
3868   const fltSemantics *Semantics;
3869   switch (Field.Type) {
3870   case 4:
3871     Semantics = &APFloat::IEEEsingle();
3872     break;
3873   case 8:
3874     Semantics = &APFloat::IEEEdouble();
3875     break;
3876   case 10:
3877     Semantics = &APFloat::x87DoubleExtended();
3878     break;
3879   default:
3880     llvm_unreachable("unknown real field type");
3881   }
3882 
3883   SMLoc Loc = getTok().getLoc();
3884 
3885   SmallVector<APInt, 1> AsIntValues;
3886   if (parseOptionalToken(AsmToken::LCurly)) {
3887     if (Field.LengthOf == 1)
3888       return Error(Loc, "Cannot initialize scalar field with array value");
3889     if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
3890         parseToken(AsmToken::RCurly))
3891       return true;
3892   } else if (parseOptionalAngleBracketOpen()) {
3893     if (Field.LengthOf == 1)
3894       return Error(Loc, "Cannot initialize scalar field with array value");
3895     if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
3896         parseAngleBracketClose())
3897       return true;
3898   } else if (Field.LengthOf > 1) {
3899     return Error(Loc, "Cannot initialize array field with scalar value");
3900   } else {
3901     AsIntValues.emplace_back();
3902     if (parseRealValue(*Semantics, AsIntValues.back()))
3903       return true;
3904   }
3905 
3906   if (AsIntValues.size() > Field.LengthOf) {
3907     return Error(Loc, "Initializer too long for field; expected at most " +
3908                           std::to_string(Field.LengthOf) + " elements, got " +
3909                           std::to_string(AsIntValues.size()));
3910   }
3911   // Default-initialize all remaining values.
3912   AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
3913                      Contents.AsIntValues.end());
3914 
3915   Initializer = FieldInitializer(std::move(AsIntValues));
3916   return false;
3917 }
3918 
parseFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,FieldInitializer & Initializer)3919 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3920                                        const StructFieldInfo &Contents,
3921                                        FieldInitializer &Initializer) {
3922   SMLoc Loc = getTok().getLoc();
3923 
3924   std::vector<StructInitializer> Initializers;
3925   if (Field.LengthOf > 1) {
3926     if (parseOptionalToken(AsmToken::LCurly)) {
3927       if (parseStructInstList(Contents.Structure, Initializers,
3928                               AsmToken::RCurly) ||
3929           parseToken(AsmToken::RCurly))
3930         return true;
3931     } else if (parseOptionalAngleBracketOpen()) {
3932       if (parseStructInstList(Contents.Structure, Initializers,
3933                               AsmToken::Greater) ||
3934           parseAngleBracketClose())
3935         return true;
3936     } else {
3937       return Error(Loc, "Cannot initialize array field with scalar value");
3938     }
3939   } else {
3940     Initializers.emplace_back();
3941     if (parseStructInitializer(Contents.Structure, Initializers.back()))
3942       return true;
3943   }
3944 
3945   if (Initializers.size() > Field.LengthOf) {
3946     return Error(Loc, "Initializer too long for field; expected at most " +
3947                           std::to_string(Field.LengthOf) + " elements, got " +
3948                           std::to_string(Initializers.size()));
3949   }
3950   // Default-initialize all remaining values.
3951   Initializers.insert(Initializers.end(),
3952                       Contents.Initializers.begin() + Initializers.size(),
3953                       Contents.Initializers.end());
3954 
3955   Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
3956   return false;
3957 }
3958 
parseFieldInitializer(const FieldInfo & Field,FieldInitializer & Initializer)3959 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3960                                        FieldInitializer &Initializer) {
3961   switch (Field.Contents.FT) {
3962   case FT_INTEGRAL:
3963     return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
3964   case FT_REAL:
3965     return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
3966   case FT_STRUCT:
3967     return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
3968   }
3969   llvm_unreachable("Unhandled FieldType enum");
3970 }
3971 
parseStructInitializer(const StructInfo & Structure,StructInitializer & Initializer)3972 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
3973                                         StructInitializer &Initializer) {
3974   const AsmToken FirstToken = getTok();
3975 
3976   Optional<AsmToken::TokenKind> EndToken;
3977   if (parseOptionalToken(AsmToken::LCurly)) {
3978     EndToken = AsmToken::RCurly;
3979   } else if (parseOptionalAngleBracketOpen()) {
3980     EndToken = AsmToken::Greater;
3981     AngleBracketDepth++;
3982   } else if (FirstToken.is(AsmToken::Identifier) &&
3983              FirstToken.getString() == "?") {
3984     // ? initializer; leave EndToken uninitialized to treat as empty.
3985     if (parseToken(AsmToken::Identifier))
3986       return true;
3987   } else {
3988     return Error(FirstToken.getLoc(), "Expected struct initializer");
3989   }
3990 
3991   auto &FieldInitializers = Initializer.FieldInitializers;
3992   size_t FieldIndex = 0;
3993   if (EndToken.hasValue()) {
3994     // Initialize all fields with given initializers.
3995     while (getTok().isNot(EndToken.getValue()) &&
3996            FieldIndex < Structure.Fields.size()) {
3997       const FieldInfo &Field = Structure.Fields[FieldIndex++];
3998       if (parseOptionalToken(AsmToken::Comma)) {
3999         // Empty initializer; use the default and continue. (Also, allow line
4000         // continuation.)
4001         FieldInitializers.push_back(Field.Contents);
4002         parseOptionalToken(AsmToken::EndOfStatement);
4003         continue;
4004       }
4005       FieldInitializers.emplace_back(Field.Contents.FT);
4006       if (parseFieldInitializer(Field, FieldInitializers.back()))
4007         return true;
4008 
4009       // Continue if we see a comma. (Also, allow line continuation.)
4010       SMLoc CommaLoc = getTok().getLoc();
4011       if (!parseOptionalToken(AsmToken::Comma))
4012         break;
4013       if (FieldIndex == Structure.Fields.size())
4014         return Error(CommaLoc, "'" + Structure.Name +
4015                                    "' initializer initializes too many fields");
4016       parseOptionalToken(AsmToken::EndOfStatement);
4017     }
4018   }
4019   // Default-initialize all remaining fields.
4020   for (auto It = Structure.Fields.begin() + FieldIndex;
4021        It != Structure.Fields.end(); ++It) {
4022     const FieldInfo &Field = *It;
4023     FieldInitializers.push_back(Field.Contents);
4024   }
4025 
4026   if (EndToken.hasValue()) {
4027     if (EndToken.getValue() == AsmToken::Greater)
4028       return parseAngleBracketClose();
4029 
4030     return parseToken(EndToken.getValue());
4031   }
4032 
4033   return false;
4034 }
4035 
parseStructInstList(const StructInfo & Structure,std::vector<StructInitializer> & Initializers,const AsmToken::TokenKind EndToken)4036 bool MasmParser::parseStructInstList(
4037     const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4038     const AsmToken::TokenKind EndToken) {
4039   while (getTok().isNot(EndToken) ||
4040          (EndToken == AsmToken::Greater &&
4041           getTok().isNot(AsmToken::GreaterGreater))) {
4042     const AsmToken NextTok = Lexer.peekTok();
4043     if (NextTok.is(AsmToken::Identifier) &&
4044         NextTok.getString().equals_lower("dup")) {
4045       const MCExpr *Value;
4046       if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4047         return true;
4048       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4049       if (!MCE)
4050         return Error(Value->getLoc(),
4051                      "cannot repeat value a non-constant number of times");
4052       const int64_t Repetitions = MCE->getValue();
4053       if (Repetitions < 0)
4054         return Error(Value->getLoc(),
4055                      "cannot repeat value a negative number of times");
4056 
4057       std::vector<StructInitializer> DuplicatedValues;
4058       if (parseToken(AsmToken::LParen,
4059                      "parentheses required for 'dup' contents") ||
4060           parseStructInstList(Structure, DuplicatedValues) ||
4061           parseToken(AsmToken::RParen, "unmatched parentheses"))
4062         return true;
4063 
4064       for (int i = 0; i < Repetitions; ++i)
4065         llvm::append_range(Initializers, DuplicatedValues);
4066     } else {
4067       Initializers.emplace_back();
4068       if (parseStructInitializer(Structure, Initializers.back()))
4069         return true;
4070     }
4071 
4072     // Continue if we see a comma. (Also, allow line continuation.)
4073     if (!parseOptionalToken(AsmToken::Comma))
4074       break;
4075     parseOptionalToken(AsmToken::EndOfStatement);
4076   }
4077 
4078   return false;
4079 }
4080 
emitFieldValue(const FieldInfo & Field,const IntFieldInfo & Contents)4081 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4082                                 const IntFieldInfo &Contents) {
4083   // Default-initialize all values.
4084   for (const MCExpr *Value : Contents.Values) {
4085     if (emitIntValue(Value, Field.Type))
4086       return true;
4087   }
4088   return false;
4089 }
4090 
emitFieldValue(const FieldInfo & Field,const RealFieldInfo & Contents)4091 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4092                                 const RealFieldInfo &Contents) {
4093   for (const APInt &AsInt : Contents.AsIntValues) {
4094     getStreamer().emitIntValue(AsInt.getLimitedValue(),
4095                                AsInt.getBitWidth() / 8);
4096   }
4097   return false;
4098 }
4099 
emitFieldValue(const FieldInfo & Field,const StructFieldInfo & Contents)4100 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4101                                 const StructFieldInfo &Contents) {
4102   for (const auto &Initializer : Contents.Initializers) {
4103     size_t Index = 0, Offset = 0;
4104     for (const auto &SubField : Contents.Structure.Fields) {
4105       getStreamer().emitZeros(SubField.Offset - Offset);
4106       Offset = SubField.Offset + SubField.SizeOf;
4107       emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4108     }
4109   }
4110   return false;
4111 }
4112 
emitFieldValue(const FieldInfo & Field)4113 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4114   switch (Field.Contents.FT) {
4115   case FT_INTEGRAL:
4116     return emitFieldValue(Field, Field.Contents.IntInfo);
4117   case FT_REAL:
4118     return emitFieldValue(Field, Field.Contents.RealInfo);
4119   case FT_STRUCT:
4120     return emitFieldValue(Field, Field.Contents.StructInfo);
4121   }
4122   llvm_unreachable("Unhandled FieldType enum");
4123 }
4124 
emitFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,const IntFieldInfo & Initializer)4125 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4126                                       const IntFieldInfo &Contents,
4127                                       const IntFieldInfo &Initializer) {
4128   for (const auto &Value : Initializer.Values) {
4129     if (emitIntValue(Value, Field.Type))
4130       return true;
4131   }
4132   // Default-initialize all remaining values.
4133   for (auto it = Contents.Values.begin() + Initializer.Values.size();
4134        it != Contents.Values.end(); ++it) {
4135     const auto &Value = *it;
4136     if (emitIntValue(Value, Field.Type))
4137       return true;
4138   }
4139   return false;
4140 }
4141 
emitFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,const RealFieldInfo & Initializer)4142 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4143                                       const RealFieldInfo &Contents,
4144                                       const RealFieldInfo &Initializer) {
4145   for (const auto &AsInt : Initializer.AsIntValues) {
4146     getStreamer().emitIntValue(AsInt.getLimitedValue(),
4147                                AsInt.getBitWidth() / 8);
4148   }
4149   // Default-initialize all remaining values.
4150   for (auto It = Contents.AsIntValues.begin() + Initializer.AsIntValues.size();
4151        It != Contents.AsIntValues.end(); ++It) {
4152     const auto &AsInt = *It;
4153     getStreamer().emitIntValue(AsInt.getLimitedValue(),
4154                                AsInt.getBitWidth() / 8);
4155   }
4156   return false;
4157 }
4158 
emitFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,const StructFieldInfo & Initializer)4159 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4160                                       const StructFieldInfo &Contents,
4161                                       const StructFieldInfo &Initializer) {
4162   for (const auto &Init : Initializer.Initializers) {
4163     emitStructInitializer(Contents.Structure, Init);
4164   }
4165   // Default-initialize all remaining values.
4166   for (auto It =
4167            Contents.Initializers.begin() + Initializer.Initializers.size();
4168        It != Contents.Initializers.end(); ++It) {
4169     const auto &Init = *It;
4170     emitStructInitializer(Contents.Structure, Init);
4171   }
4172   return false;
4173 }
4174 
emitFieldInitializer(const FieldInfo & Field,const FieldInitializer & Initializer)4175 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4176                                       const FieldInitializer &Initializer) {
4177   switch (Field.Contents.FT) {
4178   case FT_INTEGRAL:
4179     return emitFieldInitializer(Field, Field.Contents.IntInfo,
4180                                 Initializer.IntInfo);
4181   case FT_REAL:
4182     return emitFieldInitializer(Field, Field.Contents.RealInfo,
4183                                 Initializer.RealInfo);
4184   case FT_STRUCT:
4185     return emitFieldInitializer(Field, Field.Contents.StructInfo,
4186                                 Initializer.StructInfo);
4187   }
4188   llvm_unreachable("Unhandled FieldType enum");
4189 }
4190 
emitStructInitializer(const StructInfo & Structure,const StructInitializer & Initializer)4191 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4192                                        const StructInitializer &Initializer) {
4193   size_t Index = 0, Offset = 0;
4194   for (const auto &Init : Initializer.FieldInitializers) {
4195     const auto &Field = Structure.Fields[Index++];
4196     getStreamer().emitZeros(Field.Offset - Offset);
4197     Offset = Field.Offset + Field.SizeOf;
4198     if (emitFieldInitializer(Field, Init))
4199       return true;
4200   }
4201   // Default-initialize all remaining fields.
4202   for (auto It =
4203            Structure.Fields.begin() + Initializer.FieldInitializers.size();
4204        It != Structure.Fields.end(); ++It) {
4205     const auto &Field = *It;
4206     getStreamer().emitZeros(Field.Offset - Offset);
4207     Offset = Field.Offset + Field.SizeOf;
4208     if (emitFieldValue(Field))
4209       return true;
4210   }
4211   // Add final padding.
4212   if (Offset != Structure.Size)
4213     getStreamer().emitZeros(Structure.Size - Offset);
4214   return false;
4215 }
4216 
4217 // Set data values from initializers.
emitStructValues(const StructInfo & Structure,unsigned * Count)4218 bool MasmParser::emitStructValues(const StructInfo &Structure,
4219                                   unsigned *Count) {
4220   std::vector<StructInitializer> Initializers;
4221   if (parseStructInstList(Structure, Initializers))
4222     return true;
4223 
4224   for (const auto &Initializer : Initializers) {
4225     if (emitStructInitializer(Structure, Initializer))
4226       return true;
4227   }
4228 
4229   if (Count)
4230     *Count = Initializers.size();
4231   return false;
4232 }
4233 
4234 // Declare a field in the current struct.
addStructField(StringRef Name,const StructInfo & Structure)4235 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4236   StructInfo &OwningStruct = StructInProgress.back();
4237   FieldInfo &Field =
4238       OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4239   StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4240 
4241   StructInfo.Structure = Structure;
4242   Field.Type = Structure.Size;
4243 
4244   if (parseStructInstList(Structure, StructInfo.Initializers))
4245     return true;
4246 
4247   Field.LengthOf = StructInfo.Initializers.size();
4248   Field.SizeOf = Field.Type * Field.LengthOf;
4249   if (OwningStruct.IsUnion)
4250     OwningStruct.Size = std::max(OwningStruct.Size, Field.SizeOf);
4251   else
4252     OwningStruct.Size += Field.SizeOf;
4253 
4254   return false;
4255 }
4256 
4257 /// parseDirectiveStructValue
4258 ///  ::= struct-id (<struct-initializer> | {struct-initializer})
4259 ///                [, (<struct-initializer> | {struct-initializer})]*
parseDirectiveStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc)4260 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4261                                            StringRef Directive, SMLoc DirLoc) {
4262   if (StructInProgress.empty()) {
4263     if (emitStructValues(Structure))
4264       return true;
4265   } else if (addStructField("", Structure)) {
4266     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4267   }
4268 
4269   return false;
4270 }
4271 
4272 /// parseDirectiveNamedValue
4273 ///  ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc,StringRef Name)4274 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4275                                                 StringRef Directive,
4276                                                 SMLoc DirLoc, StringRef Name) {
4277   if (StructInProgress.empty()) {
4278     // Initialize named data value.
4279     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4280     getStreamer().emitLabel(Sym);
4281     unsigned Count;
4282     if (emitStructValues(Structure, &Count))
4283       return true;
4284     AsmTypeInfo Type;
4285     Type.Name = Structure.Name;
4286     Type.Size = Structure.Size * Count;
4287     Type.ElementSize = Structure.Size;
4288     Type.Length = Count;
4289     KnownType[Name.lower()] = Type;
4290   } else if (addStructField(Name, Structure)) {
4291     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4292   }
4293 
4294   return false;
4295 }
4296 
4297 /// parseDirectiveStruct
4298 ///  ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4299 ///      (dataDir | generalDir | offsetDir | nestedStruct)+
4300 ///      <name> ENDS
4301 ////// dataDir = data declaration
4302 ////// offsetDir = EVEN, ORG, ALIGN
parseDirectiveStruct(StringRef Directive,DirectiveKind DirKind,StringRef Name,SMLoc NameLoc)4303 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4304                                       DirectiveKind DirKind, StringRef Name,
4305                                       SMLoc NameLoc) {
4306   // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4307   // anyway, so all field accesses must be qualified.
4308   AsmToken NextTok = getTok();
4309   int64_t AlignmentValue = 1;
4310   if (NextTok.isNot(AsmToken::Comma) &&
4311       NextTok.isNot(AsmToken::EndOfStatement) &&
4312       parseAbsoluteExpression(AlignmentValue)) {
4313     return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4314                           "' directive");
4315   }
4316   if (!isPowerOf2_64(AlignmentValue)) {
4317     return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4318                                        std::to_string(AlignmentValue));
4319   }
4320 
4321   StringRef Qualifier;
4322   SMLoc QualifierLoc;
4323   if (parseOptionalToken(AsmToken::Comma)) {
4324     QualifierLoc = getTok().getLoc();
4325     if (parseIdentifier(Qualifier))
4326       return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4327     if (!Qualifier.equals_lower("nonunique"))
4328       return Error(QualifierLoc, "Unrecognized qualifier for '" +
4329                                      Twine(Directive) +
4330                                      "' directive; expected none or NONUNIQUE");
4331   }
4332 
4333   if (parseToken(AsmToken::EndOfStatement))
4334     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4335 
4336   StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4337   return false;
4338 }
4339 
4340 /// parseDirectiveNestedStruct
4341 ///  ::= (STRUC | STRUCT | UNION) [name]
4342 ///      (dataDir | generalDir | offsetDir | nestedStruct)+
4343 ///      ENDS
parseDirectiveNestedStruct(StringRef Directive,DirectiveKind DirKind)4344 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4345                                             DirectiveKind DirKind) {
4346   if (StructInProgress.empty())
4347     return TokError("missing name in top-level '" + Twine(Directive) +
4348                     "' directive");
4349 
4350   StringRef Name;
4351   if (getTok().is(AsmToken::Identifier)) {
4352     Name = getTok().getIdentifier();
4353     parseToken(AsmToken::Identifier);
4354   }
4355   if (parseToken(AsmToken::EndOfStatement))
4356     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4357 
4358   // Reserve space to ensure Alignment doesn't get invalidated when
4359   // StructInProgress grows.
4360   StructInProgress.reserve(StructInProgress.size() + 1);
4361   StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4362                                 StructInProgress.back().Alignment);
4363   return false;
4364 }
4365 
parseDirectiveEnds(StringRef Name,SMLoc NameLoc)4366 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4367   if (StructInProgress.empty())
4368     return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4369   if (StructInProgress.size() > 1)
4370     return Error(NameLoc, "unexpected name in nested ENDS directive");
4371   if (StructInProgress.back().Name.compare_lower(Name))
4372     return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4373                               StructInProgress.back().Name + "'");
4374   StructInfo Structure = StructInProgress.pop_back_val();
4375   // Pad to make the structure's size divisible by the smaller of its alignment
4376   // and the size of its largest field.
4377   Structure.Size = llvm::alignTo(
4378       Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4379   Structs[Name.lower()] = Structure;
4380 
4381   if (parseToken(AsmToken::EndOfStatement))
4382     return addErrorSuffix(" in ENDS directive");
4383 
4384   return false;
4385 }
4386 
parseDirectiveNestedEnds()4387 bool MasmParser::parseDirectiveNestedEnds() {
4388   if (StructInProgress.empty())
4389     return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4390   if (StructInProgress.size() == 1)
4391     return TokError("missing name in top-level ENDS directive");
4392 
4393   if (parseToken(AsmToken::EndOfStatement))
4394     return addErrorSuffix(" in nested ENDS directive");
4395 
4396   StructInfo Structure = StructInProgress.pop_back_val();
4397   // Pad to make the structure's size divisible by its alignment.
4398   Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4399 
4400   StructInfo &ParentStruct = StructInProgress.back();
4401   if (Structure.Name.empty()) {
4402     const size_t OldFields = ParentStruct.Fields.size();
4403     ParentStruct.Fields.insert(
4404         ParentStruct.Fields.end(),
4405         std::make_move_iterator(Structure.Fields.begin()),
4406         std::make_move_iterator(Structure.Fields.end()));
4407     for (const auto &FieldByName : Structure.FieldsByName) {
4408       ParentStruct.FieldsByName[FieldByName.getKey()] =
4409           FieldByName.getValue() + OldFields;
4410     }
4411     if (!ParentStruct.IsUnion) {
4412       for (auto FieldIter = ParentStruct.Fields.begin() + OldFields;
4413            FieldIter != ParentStruct.Fields.end(); ++FieldIter) {
4414         FieldIter->Offset += ParentStruct.Size;
4415       }
4416     }
4417 
4418     if (ParentStruct.IsUnion)
4419       ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4420     else
4421       ParentStruct.Size += Structure.Size;
4422   } else {
4423     FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4424                                              Structure.AlignmentSize);
4425     StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4426     Field.Type = Structure.Size;
4427     Field.LengthOf = 1;
4428     Field.SizeOf = Structure.Size;
4429 
4430     if (ParentStruct.IsUnion)
4431       ParentStruct.Size = std::max(ParentStruct.Size, Field.SizeOf);
4432     else
4433       ParentStruct.Size += Field.SizeOf;
4434 
4435     StructInfo.Structure = Structure;
4436     StructInfo.Initializers.emplace_back();
4437     auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4438     for (const auto &SubField : Structure.Fields) {
4439       FieldInitializers.push_back(SubField.Contents);
4440     }
4441   }
4442 
4443   return false;
4444 }
4445 
4446 /// parseDirectiveOrg
4447 ///  ::= .org expression [ , expression ]
parseDirectiveOrg()4448 bool MasmParser::parseDirectiveOrg() {
4449   const MCExpr *Offset;
4450   SMLoc OffsetLoc = Lexer.getLoc();
4451   if (checkForValidSection() || parseExpression(Offset))
4452     return true;
4453 
4454   // Parse optional fill expression.
4455   int64_t FillExpr = 0;
4456   if (parseOptionalToken(AsmToken::Comma))
4457     if (parseAbsoluteExpression(FillExpr))
4458       return addErrorSuffix(" in '.org' directive");
4459   if (parseToken(AsmToken::EndOfStatement))
4460     return addErrorSuffix(" in '.org' directive");
4461 
4462   getStreamer().emitValueToOffset(Offset, FillExpr, OffsetLoc);
4463   return false;
4464 }
4465 
4466 /// parseDirectiveAlign
4467 ///  ::= align expression
parseDirectiveAlign()4468 bool MasmParser::parseDirectiveAlign() {
4469   SMLoc AlignmentLoc = getLexer().getLoc();
4470   int64_t Alignment;
4471 
4472   if (checkForValidSection())
4473     return addErrorSuffix(" in align directive");
4474   // Ignore empty 'align' directives.
4475   if (getTok().is(AsmToken::EndOfStatement)) {
4476     return Warning(AlignmentLoc,
4477                    "align directive with no operand is ignored") &&
4478            parseToken(AsmToken::EndOfStatement);
4479   }
4480   if (parseAbsoluteExpression(Alignment) ||
4481       parseToken(AsmToken::EndOfStatement))
4482     return addErrorSuffix(" in align directive");
4483 
4484   // Always emit an alignment here even if we thrown an error.
4485   bool ReturnVal = false;
4486 
4487   // Reject alignments that aren't either a power of two or zero, for gas
4488   // compatibility. Alignment of zero is silently rounded up to one.
4489   if (Alignment == 0)
4490     Alignment = 1;
4491   if (!isPowerOf2_64(Alignment))
4492     ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2");
4493 
4494   // Check whether we should use optimal code alignment for this align
4495   // directive.
4496   const MCSection *Section = getStreamer().getCurrentSectionOnly();
4497   assert(Section && "must have section to emit alignment");
4498   if (Section->UseCodeAlign()) {
4499     getStreamer().emitCodeAlignment(Alignment, /*MaxBytesToEmit=*/0);
4500   } else {
4501     // FIXME: Target specific behavior about how the "extra" bytes are filled.
4502     getStreamer().emitValueToAlignment(Alignment, /*Value=*/0, /*ValueSize=*/1,
4503                                        /*MaxBytesToEmit=*/0);
4504   }
4505 
4506   return ReturnVal;
4507 }
4508 
4509 /// parseDirectiveFile
4510 /// ::= .file filename
4511 /// ::= .file number [directory] filename [md5 checksum] [source source-text]
parseDirectiveFile(SMLoc DirectiveLoc)4512 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4513   // FIXME: I'm not sure what this is.
4514   int64_t FileNumber = -1;
4515   if (getLexer().is(AsmToken::Integer)) {
4516     FileNumber = getTok().getIntVal();
4517     Lex();
4518 
4519     if (FileNumber < 0)
4520       return TokError("negative file number");
4521   }
4522 
4523   std::string Path;
4524 
4525   // Usually the directory and filename together, otherwise just the directory.
4526   // Allow the strings to have escaped octal character sequence.
4527   if (check(getTok().isNot(AsmToken::String),
4528             "unexpected token in '.file' directive") ||
4529       parseEscapedString(Path))
4530     return true;
4531 
4532   StringRef Directory;
4533   StringRef Filename;
4534   std::string FilenameData;
4535   if (getLexer().is(AsmToken::String)) {
4536     if (check(FileNumber == -1,
4537               "explicit path specified, but no file number") ||
4538         parseEscapedString(FilenameData))
4539       return true;
4540     Filename = FilenameData;
4541     Directory = Path;
4542   } else {
4543     Filename = Path;
4544   }
4545 
4546   uint64_t MD5Hi, MD5Lo;
4547   bool HasMD5 = false;
4548 
4549   Optional<StringRef> Source;
4550   bool HasSource = false;
4551   std::string SourceString;
4552 
4553   while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4554     StringRef Keyword;
4555     if (check(getTok().isNot(AsmToken::Identifier),
4556               "unexpected token in '.file' directive") ||
4557         parseIdentifier(Keyword))
4558       return true;
4559     if (Keyword == "md5") {
4560       HasMD5 = true;
4561       if (check(FileNumber == -1,
4562                 "MD5 checksum specified, but no file number") ||
4563           parseHexOcta(*this, MD5Hi, MD5Lo))
4564         return true;
4565     } else if (Keyword == "source") {
4566       HasSource = true;
4567       if (check(FileNumber == -1,
4568                 "source specified, but no file number") ||
4569           check(getTok().isNot(AsmToken::String),
4570                 "unexpected token in '.file' directive") ||
4571           parseEscapedString(SourceString))
4572         return true;
4573     } else {
4574       return TokError("unexpected token in '.file' directive");
4575     }
4576   }
4577 
4578   if (FileNumber == -1) {
4579     // Ignore the directive if there is no number and the target doesn't support
4580     // numberless .file directives. This allows some portability of assembler
4581     // between different object file formats.
4582     if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4583       getStreamer().emitFileDirective(Filename);
4584   } else {
4585     // In case there is a -g option as well as debug info from directive .file,
4586     // we turn off the -g option, directly use the existing debug info instead.
4587     // Throw away any implicit file table for the assembler source.
4588     if (Ctx.getGenDwarfForAssembly()) {
4589       Ctx.getMCDwarfLineTable(0).resetFileTable();
4590       Ctx.setGenDwarfForAssembly(false);
4591     }
4592 
4593     Optional<MD5::MD5Result> CKMem;
4594     if (HasMD5) {
4595       MD5::MD5Result Sum;
4596       for (unsigned i = 0; i != 8; ++i) {
4597         Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4598         Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4599       }
4600       CKMem = Sum;
4601     }
4602     if (HasSource) {
4603       char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4604       memcpy(SourceBuf, SourceString.data(), SourceString.size());
4605       Source = StringRef(SourceBuf, SourceString.size());
4606     }
4607     if (FileNumber == 0) {
4608       if (Ctx.getDwarfVersion() < 5)
4609         return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4610       getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4611     } else {
4612       Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4613           FileNumber, Directory, Filename, CKMem, Source);
4614       if (!FileNumOrErr)
4615         return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4616     }
4617     // Alert the user if there are some .file directives with MD5 and some not.
4618     // But only do that once.
4619     if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4620       ReportedInconsistentMD5 = true;
4621       return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4622     }
4623   }
4624 
4625   return false;
4626 }
4627 
4628 /// parseDirectiveLine
4629 /// ::= .line [number]
parseDirectiveLine()4630 bool MasmParser::parseDirectiveLine() {
4631   int64_t LineNumber;
4632   if (getLexer().is(AsmToken::Integer)) {
4633     if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4634       return true;
4635     (void)LineNumber;
4636     // FIXME: Do something with the .line.
4637   }
4638   if (parseToken(AsmToken::EndOfStatement,
4639                  "unexpected token in '.line' directive"))
4640     return true;
4641 
4642   return false;
4643 }
4644 
4645 /// parseDirectiveLoc
4646 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4647 ///                                [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4648 /// The first number is a file number, must have been previously assigned with
4649 /// a .file directive, the second number is the line number and optionally the
4650 /// third number is a column position (zero if not specified).  The remaining
4651 /// optional items are .loc sub-directives.
parseDirectiveLoc()4652 bool MasmParser::parseDirectiveLoc() {
4653   int64_t FileNumber = 0, LineNumber = 0;
4654   SMLoc Loc = getTok().getLoc();
4655   if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4656       check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4657             "file number less than one in '.loc' directive") ||
4658       check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4659             "unassigned file number in '.loc' directive"))
4660     return true;
4661 
4662   // optional
4663   if (getLexer().is(AsmToken::Integer)) {
4664     LineNumber = getTok().getIntVal();
4665     if (LineNumber < 0)
4666       return TokError("line number less than zero in '.loc' directive");
4667     Lex();
4668   }
4669 
4670   int64_t ColumnPos = 0;
4671   if (getLexer().is(AsmToken::Integer)) {
4672     ColumnPos = getTok().getIntVal();
4673     if (ColumnPos < 0)
4674       return TokError("column position less than zero in '.loc' directive");
4675     Lex();
4676   }
4677 
4678   auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4679   unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4680   unsigned Isa = 0;
4681   int64_t Discriminator = 0;
4682 
4683   auto parseLocOp = [&]() -> bool {
4684     StringRef Name;
4685     SMLoc Loc = getTok().getLoc();
4686     if (parseIdentifier(Name))
4687       return TokError("unexpected token in '.loc' directive");
4688 
4689     if (Name == "basic_block")
4690       Flags |= DWARF2_FLAG_BASIC_BLOCK;
4691     else if (Name == "prologue_end")
4692       Flags |= DWARF2_FLAG_PROLOGUE_END;
4693     else if (Name == "epilogue_begin")
4694       Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
4695     else if (Name == "is_stmt") {
4696       Loc = getTok().getLoc();
4697       const MCExpr *Value;
4698       if (parseExpression(Value))
4699         return true;
4700       // The expression must be the constant 0 or 1.
4701       if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4702         int Value = MCE->getValue();
4703         if (Value == 0)
4704           Flags &= ~DWARF2_FLAG_IS_STMT;
4705         else if (Value == 1)
4706           Flags |= DWARF2_FLAG_IS_STMT;
4707         else
4708           return Error(Loc, "is_stmt value not 0 or 1");
4709       } else {
4710         return Error(Loc, "is_stmt value not the constant value of 0 or 1");
4711       }
4712     } else if (Name == "isa") {
4713       Loc = getTok().getLoc();
4714       const MCExpr *Value;
4715       if (parseExpression(Value))
4716         return true;
4717       // The expression must be a constant greater or equal to 0.
4718       if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4719         int Value = MCE->getValue();
4720         if (Value < 0)
4721           return Error(Loc, "isa number less than zero");
4722         Isa = Value;
4723       } else {
4724         return Error(Loc, "isa number not a constant value");
4725       }
4726     } else if (Name == "discriminator") {
4727       if (parseAbsoluteExpression(Discriminator))
4728         return true;
4729     } else {
4730       return Error(Loc, "unknown sub-directive in '.loc' directive");
4731     }
4732     return false;
4733   };
4734 
4735   if (parseMany(parseLocOp, false /*hasComma*/))
4736     return true;
4737 
4738   getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
4739                                       Isa, Discriminator, StringRef());
4740 
4741   return false;
4742 }
4743 
4744 /// parseDirectiveStabs
4745 /// ::= .stabs string, number, number, number
parseDirectiveStabs()4746 bool MasmParser::parseDirectiveStabs() {
4747   return TokError("unsupported directive '.stabs'");
4748 }
4749 
4750 /// parseDirectiveCVFile
4751 /// ::= .cv_file number filename [checksum] [checksumkind]
parseDirectiveCVFile()4752 bool MasmParser::parseDirectiveCVFile() {
4753   SMLoc FileNumberLoc = getTok().getLoc();
4754   int64_t FileNumber;
4755   std::string Filename;
4756   std::string Checksum;
4757   int64_t ChecksumKind = 0;
4758 
4759   if (parseIntToken(FileNumber,
4760                     "expected file number in '.cv_file' directive") ||
4761       check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
4762       check(getTok().isNot(AsmToken::String),
4763             "unexpected token in '.cv_file' directive") ||
4764       parseEscapedString(Filename))
4765     return true;
4766   if (!parseOptionalToken(AsmToken::EndOfStatement)) {
4767     if (check(getTok().isNot(AsmToken::String),
4768               "unexpected token in '.cv_file' directive") ||
4769         parseEscapedString(Checksum) ||
4770         parseIntToken(ChecksumKind,
4771                       "expected checksum kind in '.cv_file' directive") ||
4772         parseToken(AsmToken::EndOfStatement,
4773                    "unexpected token in '.cv_file' directive"))
4774       return true;
4775   }
4776 
4777   Checksum = fromHex(Checksum);
4778   void *CKMem = Ctx.allocate(Checksum.size(), 1);
4779   memcpy(CKMem, Checksum.data(), Checksum.size());
4780   ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
4781                                     Checksum.size());
4782 
4783   if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
4784                                          static_cast<uint8_t>(ChecksumKind)))
4785     return Error(FileNumberLoc, "file number already allocated");
4786 
4787   return false;
4788 }
4789 
parseCVFunctionId(int64_t & FunctionId,StringRef DirectiveName)4790 bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
4791                                    StringRef DirectiveName) {
4792   SMLoc Loc;
4793   return parseTokenLoc(Loc) ||
4794          parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
4795                                        "' directive") ||
4796          check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
4797                "expected function id within range [0, UINT_MAX)");
4798 }
4799 
parseCVFileId(int64_t & FileNumber,StringRef DirectiveName)4800 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
4801   SMLoc Loc;
4802   return parseTokenLoc(Loc) ||
4803          parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
4804                                        "' directive") ||
4805          check(FileNumber < 1, Loc, "file number less than one in '" +
4806                                         DirectiveName + "' directive") ||
4807          check(!getCVContext().isValidFileNumber(FileNumber), Loc,
4808                "unassigned file number in '" + DirectiveName + "' directive");
4809 }
4810 
4811 /// parseDirectiveCVFuncId
4812 /// ::= .cv_func_id FunctionId
4813 ///
4814 /// Introduces a function ID that can be used with .cv_loc.
parseDirectiveCVFuncId()4815 bool MasmParser::parseDirectiveCVFuncId() {
4816   SMLoc FunctionIdLoc = getTok().getLoc();
4817   int64_t FunctionId;
4818 
4819   if (parseCVFunctionId(FunctionId, ".cv_func_id") ||
4820       parseToken(AsmToken::EndOfStatement,
4821                  "unexpected token in '.cv_func_id' directive"))
4822     return true;
4823 
4824   if (!getStreamer().EmitCVFuncIdDirective(FunctionId))
4825     return Error(FunctionIdLoc, "function id already allocated");
4826 
4827   return false;
4828 }
4829 
4830 /// parseDirectiveCVInlineSiteId
4831 /// ::= .cv_inline_site_id FunctionId
4832 ///         "within" IAFunc
4833 ///         "inlined_at" IAFile IALine [IACol]
4834 ///
4835 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
4836 /// at" source location information for use in the line table of the caller,
4837 /// whether the caller is a real function or another inlined call site.
parseDirectiveCVInlineSiteId()4838 bool MasmParser::parseDirectiveCVInlineSiteId() {
4839   SMLoc FunctionIdLoc = getTok().getLoc();
4840   int64_t FunctionId;
4841   int64_t IAFunc;
4842   int64_t IAFile;
4843   int64_t IALine;
4844   int64_t IACol = 0;
4845 
4846   // FunctionId
4847   if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
4848     return true;
4849 
4850   // "within"
4851   if (check((getLexer().isNot(AsmToken::Identifier) ||
4852              getTok().getIdentifier() != "within"),
4853             "expected 'within' identifier in '.cv_inline_site_id' directive"))
4854     return true;
4855   Lex();
4856 
4857   // IAFunc
4858   if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
4859     return true;
4860 
4861   // "inlined_at"
4862   if (check((getLexer().isNot(AsmToken::Identifier) ||
4863              getTok().getIdentifier() != "inlined_at"),
4864             "expected 'inlined_at' identifier in '.cv_inline_site_id' "
4865             "directive") )
4866     return true;
4867   Lex();
4868 
4869   // IAFile IALine
4870   if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
4871       parseIntToken(IALine, "expected line number after 'inlined_at'"))
4872     return true;
4873 
4874   // [IACol]
4875   if (getLexer().is(AsmToken::Integer)) {
4876     IACol = getTok().getIntVal();
4877     Lex();
4878   }
4879 
4880   if (parseToken(AsmToken::EndOfStatement,
4881                  "unexpected token in '.cv_inline_site_id' directive"))
4882     return true;
4883 
4884   if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
4885                                                  IALine, IACol, FunctionIdLoc))
4886     return Error(FunctionIdLoc, "function id already allocated");
4887 
4888   return false;
4889 }
4890 
4891 /// parseDirectiveCVLoc
4892 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
4893 ///                                [is_stmt VALUE]
4894 /// The first number is a file number, must have been previously assigned with
4895 /// a .file directive, the second number is the line number and optionally the
4896 /// third number is a column position (zero if not specified).  The remaining
4897 /// optional items are .loc sub-directives.
parseDirectiveCVLoc()4898 bool MasmParser::parseDirectiveCVLoc() {
4899   SMLoc DirectiveLoc = getTok().getLoc();
4900   int64_t FunctionId, FileNumber;
4901   if (parseCVFunctionId(FunctionId, ".cv_loc") ||
4902       parseCVFileId(FileNumber, ".cv_loc"))
4903     return true;
4904 
4905   int64_t LineNumber = 0;
4906   if (getLexer().is(AsmToken::Integer)) {
4907     LineNumber = getTok().getIntVal();
4908     if (LineNumber < 0)
4909       return TokError("line number less than zero in '.cv_loc' directive");
4910     Lex();
4911   }
4912 
4913   int64_t ColumnPos = 0;
4914   if (getLexer().is(AsmToken::Integer)) {
4915     ColumnPos = getTok().getIntVal();
4916     if (ColumnPos < 0)
4917       return TokError("column position less than zero in '.cv_loc' directive");
4918     Lex();
4919   }
4920 
4921   bool PrologueEnd = false;
4922   uint64_t IsStmt = 0;
4923 
4924   auto parseOp = [&]() -> bool {
4925     StringRef Name;
4926     SMLoc Loc = getTok().getLoc();
4927     if (parseIdentifier(Name))
4928       return TokError("unexpected token in '.cv_loc' directive");
4929     if (Name == "prologue_end")
4930       PrologueEnd = true;
4931     else if (Name == "is_stmt") {
4932       Loc = getTok().getLoc();
4933       const MCExpr *Value;
4934       if (parseExpression(Value))
4935         return true;
4936       // The expression must be the constant 0 or 1.
4937       IsStmt = ~0ULL;
4938       if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
4939         IsStmt = MCE->getValue();
4940 
4941       if (IsStmt > 1)
4942         return Error(Loc, "is_stmt value not 0 or 1");
4943     } else {
4944       return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
4945     }
4946     return false;
4947   };
4948 
4949   if (parseMany(parseOp, false /*hasComma*/))
4950     return true;
4951 
4952   getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
4953                                    ColumnPos, PrologueEnd, IsStmt, StringRef(),
4954                                    DirectiveLoc);
4955   return false;
4956 }
4957 
4958 /// parseDirectiveCVLinetable
4959 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
parseDirectiveCVLinetable()4960 bool MasmParser::parseDirectiveCVLinetable() {
4961   int64_t FunctionId;
4962   StringRef FnStartName, FnEndName;
4963   SMLoc Loc = getTok().getLoc();
4964   if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
4965       parseToken(AsmToken::Comma,
4966                  "unexpected token in '.cv_linetable' directive") ||
4967       parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
4968                                   "expected identifier in directive") ||
4969       parseToken(AsmToken::Comma,
4970                  "unexpected token in '.cv_linetable' directive") ||
4971       parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
4972                                   "expected identifier in directive"))
4973     return true;
4974 
4975   MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
4976   MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
4977 
4978   getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
4979   return false;
4980 }
4981 
4982 /// parseDirectiveCVInlineLinetable
4983 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
parseDirectiveCVInlineLinetable()4984 bool MasmParser::parseDirectiveCVInlineLinetable() {
4985   int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
4986   StringRef FnStartName, FnEndName;
4987   SMLoc Loc = getTok().getLoc();
4988   if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
4989       parseTokenLoc(Loc) ||
4990       parseIntToken(
4991           SourceFileId,
4992           "expected SourceField in '.cv_inline_linetable' directive") ||
4993       check(SourceFileId <= 0, Loc,
4994             "File id less than zero in '.cv_inline_linetable' directive") ||
4995       parseTokenLoc(Loc) ||
4996       parseIntToken(
4997           SourceLineNum,
4998           "expected SourceLineNum in '.cv_inline_linetable' directive") ||
4999       check(SourceLineNum < 0, Loc,
5000             "Line number less than zero in '.cv_inline_linetable' directive") ||
5001       parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5002                                   "expected identifier in directive") ||
5003       parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5004                                   "expected identifier in directive"))
5005     return true;
5006 
5007   if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
5008     return true;
5009 
5010   MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5011   MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5012   getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5013                                                SourceLineNum, FnStartSym,
5014                                                FnEndSym);
5015   return false;
5016 }
5017 
initializeCVDefRangeTypeMap()5018 void MasmParser::initializeCVDefRangeTypeMap() {
5019   CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5020   CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5021   CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5022   CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5023 }
5024 
5025 /// parseDirectiveCVDefRange
5026 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
parseDirectiveCVDefRange()5027 bool MasmParser::parseDirectiveCVDefRange() {
5028   SMLoc Loc;
5029   std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5030   while (getLexer().is(AsmToken::Identifier)) {
5031     Loc = getLexer().getLoc();
5032     StringRef GapStartName;
5033     if (parseIdentifier(GapStartName))
5034       return Error(Loc, "expected identifier in directive");
5035     MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5036 
5037     Loc = getLexer().getLoc();
5038     StringRef GapEndName;
5039     if (parseIdentifier(GapEndName))
5040       return Error(Loc, "expected identifier in directive");
5041     MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5042 
5043     Ranges.push_back({GapStartSym, GapEndSym});
5044   }
5045 
5046   StringRef CVDefRangeTypeStr;
5047   if (parseToken(
5048           AsmToken::Comma,
5049           "expected comma before def_range type in .cv_def_range directive") ||
5050       parseIdentifier(CVDefRangeTypeStr))
5051     return Error(Loc, "expected def_range type in directive");
5052 
5053   StringMap<CVDefRangeType>::const_iterator CVTypeIt =
5054       CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5055   CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5056                                 ? CVDR_DEFRANGE
5057                                 : CVTypeIt->getValue();
5058   switch (CVDRType) {
5059   case CVDR_DEFRANGE_REGISTER: {
5060     int64_t DRRegister;
5061     if (parseToken(AsmToken::Comma, "expected comma before register number in "
5062                                     ".cv_def_range directive") ||
5063         parseAbsoluteExpression(DRRegister))
5064       return Error(Loc, "expected register number");
5065 
5066     codeview::DefRangeRegisterHeader DRHdr;
5067     DRHdr.Register = DRRegister;
5068     DRHdr.MayHaveNoName = 0;
5069     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5070     break;
5071   }
5072   case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5073     int64_t DROffset;
5074     if (parseToken(AsmToken::Comma,
5075                    "expected comma before offset in .cv_def_range directive") ||
5076         parseAbsoluteExpression(DROffset))
5077       return Error(Loc, "expected offset value");
5078 
5079     codeview::DefRangeFramePointerRelHeader DRHdr;
5080     DRHdr.Offset = DROffset;
5081     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5082     break;
5083   }
5084   case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5085     int64_t DRRegister;
5086     int64_t DROffsetInParent;
5087     if (parseToken(AsmToken::Comma, "expected comma before register number in "
5088                                     ".cv_def_range directive") ||
5089         parseAbsoluteExpression(DRRegister))
5090       return Error(Loc, "expected register number");
5091     if (parseToken(AsmToken::Comma,
5092                    "expected comma before offset in .cv_def_range directive") ||
5093         parseAbsoluteExpression(DROffsetInParent))
5094       return Error(Loc, "expected offset value");
5095 
5096     codeview::DefRangeSubfieldRegisterHeader DRHdr;
5097     DRHdr.Register = DRRegister;
5098     DRHdr.MayHaveNoName = 0;
5099     DRHdr.OffsetInParent = DROffsetInParent;
5100     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5101     break;
5102   }
5103   case CVDR_DEFRANGE_REGISTER_REL: {
5104     int64_t DRRegister;
5105     int64_t DRFlags;
5106     int64_t DRBasePointerOffset;
5107     if (parseToken(AsmToken::Comma, "expected comma before register number in "
5108                                     ".cv_def_range directive") ||
5109         parseAbsoluteExpression(DRRegister))
5110       return Error(Loc, "expected register value");
5111     if (parseToken(
5112             AsmToken::Comma,
5113             "expected comma before flag value in .cv_def_range directive") ||
5114         parseAbsoluteExpression(DRFlags))
5115       return Error(Loc, "expected flag value");
5116     if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5117                                     "in .cv_def_range directive") ||
5118         parseAbsoluteExpression(DRBasePointerOffset))
5119       return Error(Loc, "expected base pointer offset value");
5120 
5121     codeview::DefRangeRegisterRelHeader DRHdr;
5122     DRHdr.Register = DRRegister;
5123     DRHdr.Flags = DRFlags;
5124     DRHdr.BasePointerOffset = DRBasePointerOffset;
5125     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5126     break;
5127   }
5128   default:
5129     return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5130   }
5131   return true;
5132 }
5133 
5134 /// parseDirectiveCVString
5135 /// ::= .cv_stringtable "string"
parseDirectiveCVString()5136 bool MasmParser::parseDirectiveCVString() {
5137   std::string Data;
5138   if (checkForValidSection() || parseEscapedString(Data))
5139     return addErrorSuffix(" in '.cv_string' directive");
5140 
5141   // Put the string in the table and emit the offset.
5142   std::pair<StringRef, unsigned> Insertion =
5143       getCVContext().addToStringTable(Data);
5144   getStreamer().emitIntValue(Insertion.second, 4);
5145   return false;
5146 }
5147 
5148 /// parseDirectiveCVStringTable
5149 /// ::= .cv_stringtable
parseDirectiveCVStringTable()5150 bool MasmParser::parseDirectiveCVStringTable() {
5151   getStreamer().emitCVStringTableDirective();
5152   return false;
5153 }
5154 
5155 /// parseDirectiveCVFileChecksums
5156 /// ::= .cv_filechecksums
parseDirectiveCVFileChecksums()5157 bool MasmParser::parseDirectiveCVFileChecksums() {
5158   getStreamer().emitCVFileChecksumsDirective();
5159   return false;
5160 }
5161 
5162 /// parseDirectiveCVFileChecksumOffset
5163 /// ::= .cv_filechecksumoffset fileno
parseDirectiveCVFileChecksumOffset()5164 bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5165   int64_t FileNo;
5166   if (parseIntToken(FileNo, "expected identifier in directive"))
5167     return true;
5168   if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
5169     return true;
5170   getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5171   return false;
5172 }
5173 
5174 /// parseDirectiveCVFPOData
5175 /// ::= .cv_fpo_data procsym
parseDirectiveCVFPOData()5176 bool MasmParser::parseDirectiveCVFPOData() {
5177   SMLoc DirLoc = getLexer().getLoc();
5178   StringRef ProcName;
5179   if (parseIdentifier(ProcName))
5180     return TokError("expected symbol name");
5181   if (parseEOL("unexpected tokens"))
5182     return addErrorSuffix(" in '.cv_fpo_data' directive");
5183   MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5184   getStreamer().EmitCVFPOData(ProcSym, DirLoc);
5185   return false;
5186 }
5187 
5188 /// parseDirectiveCFISections
5189 /// ::= .cfi_sections section [, section]
parseDirectiveCFISections()5190 bool MasmParser::parseDirectiveCFISections() {
5191   StringRef Name;
5192   bool EH = false;
5193   bool Debug = false;
5194 
5195   if (parseIdentifier(Name))
5196     return TokError("Expected an identifier");
5197 
5198   if (Name == ".eh_frame")
5199     EH = true;
5200   else if (Name == ".debug_frame")
5201     Debug = true;
5202 
5203   if (getLexer().is(AsmToken::Comma)) {
5204     Lex();
5205 
5206     if (parseIdentifier(Name))
5207       return TokError("Expected an identifier");
5208 
5209     if (Name == ".eh_frame")
5210       EH = true;
5211     else if (Name == ".debug_frame")
5212       Debug = true;
5213   }
5214 
5215   getStreamer().emitCFISections(EH, Debug);
5216   return false;
5217 }
5218 
5219 /// parseDirectiveCFIStartProc
5220 /// ::= .cfi_startproc [simple]
parseDirectiveCFIStartProc()5221 bool MasmParser::parseDirectiveCFIStartProc() {
5222   StringRef Simple;
5223   if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5224     if (check(parseIdentifier(Simple) || Simple != "simple",
5225               "unexpected token") ||
5226         parseToken(AsmToken::EndOfStatement))
5227       return addErrorSuffix(" in '.cfi_startproc' directive");
5228   }
5229 
5230   // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5231   // being produced if this directive is emitted as part of preprocessor macro
5232   // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5233   // Tools like llvm-mc on the other hand are not affected by it, and report
5234   // correct context information.
5235   getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5236   return false;
5237 }
5238 
5239 /// parseDirectiveCFIEndProc
5240 /// ::= .cfi_endproc
parseDirectiveCFIEndProc()5241 bool MasmParser::parseDirectiveCFIEndProc() {
5242   getStreamer().emitCFIEndProc();
5243   return false;
5244 }
5245 
5246 /// parse register name or number.
parseRegisterOrRegisterNumber(int64_t & Register,SMLoc DirectiveLoc)5247 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5248                                                SMLoc DirectiveLoc) {
5249   unsigned RegNo;
5250 
5251   if (getLexer().isNot(AsmToken::Integer)) {
5252     if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5253       return true;
5254     Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5255   } else
5256     return parseAbsoluteExpression(Register);
5257 
5258   return false;
5259 }
5260 
5261 /// parseDirectiveCFIDefCfa
5262 /// ::= .cfi_def_cfa register,  offset
parseDirectiveCFIDefCfa(SMLoc DirectiveLoc)5263 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5264   int64_t Register = 0, Offset = 0;
5265   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5266       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5267       parseAbsoluteExpression(Offset))
5268     return true;
5269 
5270   getStreamer().emitCFIDefCfa(Register, Offset);
5271   return false;
5272 }
5273 
5274 /// parseDirectiveCFIDefCfaOffset
5275 /// ::= .cfi_def_cfa_offset offset
parseDirectiveCFIDefCfaOffset()5276 bool MasmParser::parseDirectiveCFIDefCfaOffset() {
5277   int64_t Offset = 0;
5278   if (parseAbsoluteExpression(Offset))
5279     return true;
5280 
5281   getStreamer().emitCFIDefCfaOffset(Offset);
5282   return false;
5283 }
5284 
5285 /// parseDirectiveCFIRegister
5286 /// ::= .cfi_register register, register
parseDirectiveCFIRegister(SMLoc DirectiveLoc)5287 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5288   int64_t Register1 = 0, Register2 = 0;
5289   if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5290       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5291       parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5292     return true;
5293 
5294   getStreamer().emitCFIRegister(Register1, Register2);
5295   return false;
5296 }
5297 
5298 /// parseDirectiveCFIWindowSave
5299 /// ::= .cfi_window_save
parseDirectiveCFIWindowSave()5300 bool MasmParser::parseDirectiveCFIWindowSave() {
5301   getStreamer().emitCFIWindowSave();
5302   return false;
5303 }
5304 
5305 /// parseDirectiveCFIAdjustCfaOffset
5306 /// ::= .cfi_adjust_cfa_offset adjustment
parseDirectiveCFIAdjustCfaOffset()5307 bool MasmParser::parseDirectiveCFIAdjustCfaOffset() {
5308   int64_t Adjustment = 0;
5309   if (parseAbsoluteExpression(Adjustment))
5310     return true;
5311 
5312   getStreamer().emitCFIAdjustCfaOffset(Adjustment);
5313   return false;
5314 }
5315 
5316 /// parseDirectiveCFIDefCfaRegister
5317 /// ::= .cfi_def_cfa_register register
parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc)5318 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5319   int64_t Register = 0;
5320   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5321     return true;
5322 
5323   getStreamer().emitCFIDefCfaRegister(Register);
5324   return false;
5325 }
5326 
5327 /// parseDirectiveCFIOffset
5328 /// ::= .cfi_offset register, offset
parseDirectiveCFIOffset(SMLoc DirectiveLoc)5329 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5330   int64_t Register = 0;
5331   int64_t Offset = 0;
5332 
5333   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5334       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5335       parseAbsoluteExpression(Offset))
5336     return true;
5337 
5338   getStreamer().emitCFIOffset(Register, Offset);
5339   return false;
5340 }
5341 
5342 /// parseDirectiveCFIRelOffset
5343 /// ::= .cfi_rel_offset register, offset
parseDirectiveCFIRelOffset(SMLoc DirectiveLoc)5344 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5345   int64_t Register = 0, Offset = 0;
5346 
5347   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5348       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5349       parseAbsoluteExpression(Offset))
5350     return true;
5351 
5352   getStreamer().emitCFIRelOffset(Register, Offset);
5353   return false;
5354 }
5355 
isValidEncoding(int64_t Encoding)5356 static bool isValidEncoding(int64_t Encoding) {
5357   if (Encoding & ~0xff)
5358     return false;
5359 
5360   if (Encoding == dwarf::DW_EH_PE_omit)
5361     return true;
5362 
5363   const unsigned Format = Encoding & 0xf;
5364   if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
5365       Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
5366       Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
5367       Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
5368     return false;
5369 
5370   const unsigned Application = Encoding & 0x70;
5371   if (Application != dwarf::DW_EH_PE_absptr &&
5372       Application != dwarf::DW_EH_PE_pcrel)
5373     return false;
5374 
5375   return true;
5376 }
5377 
5378 /// parseDirectiveCFIPersonalityOrLsda
5379 /// IsPersonality true for cfi_personality, false for cfi_lsda
5380 /// ::= .cfi_personality encoding, [symbol_name]
5381 /// ::= .cfi_lsda encoding, [symbol_name]
parseDirectiveCFIPersonalityOrLsda(bool IsPersonality)5382 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5383   int64_t Encoding = 0;
5384   if (parseAbsoluteExpression(Encoding))
5385     return true;
5386   if (Encoding == dwarf::DW_EH_PE_omit)
5387     return false;
5388 
5389   StringRef Name;
5390   if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5391       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5392       check(parseIdentifier(Name), "expected identifier in directive"))
5393     return true;
5394 
5395   MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5396 
5397   if (IsPersonality)
5398     getStreamer().emitCFIPersonality(Sym, Encoding);
5399   else
5400     getStreamer().emitCFILsda(Sym, Encoding);
5401   return false;
5402 }
5403 
5404 /// parseDirectiveCFIRememberState
5405 /// ::= .cfi_remember_state
parseDirectiveCFIRememberState()5406 bool MasmParser::parseDirectiveCFIRememberState() {
5407   getStreamer().emitCFIRememberState();
5408   return false;
5409 }
5410 
5411 /// parseDirectiveCFIRestoreState
5412 /// ::= .cfi_remember_state
parseDirectiveCFIRestoreState()5413 bool MasmParser::parseDirectiveCFIRestoreState() {
5414   getStreamer().emitCFIRestoreState();
5415   return false;
5416 }
5417 
5418 /// parseDirectiveCFISameValue
5419 /// ::= .cfi_same_value register
parseDirectiveCFISameValue(SMLoc DirectiveLoc)5420 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5421   int64_t Register = 0;
5422 
5423   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5424     return true;
5425 
5426   getStreamer().emitCFISameValue(Register);
5427   return false;
5428 }
5429 
5430 /// parseDirectiveCFIRestore
5431 /// ::= .cfi_restore register
parseDirectiveCFIRestore(SMLoc DirectiveLoc)5432 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5433   int64_t Register = 0;
5434   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5435     return true;
5436 
5437   getStreamer().emitCFIRestore(Register);
5438   return false;
5439 }
5440 
5441 /// parseDirectiveCFIEscape
5442 /// ::= .cfi_escape expression[,...]
parseDirectiveCFIEscape()5443 bool MasmParser::parseDirectiveCFIEscape() {
5444   std::string Values;
5445   int64_t CurrValue;
5446   if (parseAbsoluteExpression(CurrValue))
5447     return true;
5448 
5449   Values.push_back((uint8_t)CurrValue);
5450 
5451   while (getLexer().is(AsmToken::Comma)) {
5452     Lex();
5453 
5454     if (parseAbsoluteExpression(CurrValue))
5455       return true;
5456 
5457     Values.push_back((uint8_t)CurrValue);
5458   }
5459 
5460   getStreamer().emitCFIEscape(Values);
5461   return false;
5462 }
5463 
5464 /// parseDirectiveCFIReturnColumn
5465 /// ::= .cfi_return_column register
parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc)5466 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5467   int64_t Register = 0;
5468   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5469     return true;
5470   getStreamer().emitCFIReturnColumn(Register);
5471   return false;
5472 }
5473 
5474 /// parseDirectiveCFISignalFrame
5475 /// ::= .cfi_signal_frame
parseDirectiveCFISignalFrame()5476 bool MasmParser::parseDirectiveCFISignalFrame() {
5477   if (parseToken(AsmToken::EndOfStatement,
5478                  "unexpected token in '.cfi_signal_frame'"))
5479     return true;
5480 
5481   getStreamer().emitCFISignalFrame();
5482   return false;
5483 }
5484 
5485 /// parseDirectiveCFIUndefined
5486 /// ::= .cfi_undefined register
parseDirectiveCFIUndefined(SMLoc DirectiveLoc)5487 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5488   int64_t Register = 0;
5489 
5490   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5491     return true;
5492 
5493   getStreamer().emitCFIUndefined(Register);
5494   return false;
5495 }
5496 
5497 /// parseDirectiveMacro
5498 /// ::= name macro [parameters]
5499 ///     ["LOCAL" identifiers]
5500 ///   parameters ::= parameter [, parameter]*
5501 ///   parameter ::= name ":" qualifier
5502 ///   qualifier ::= "req" | "vararg" | "=" macro_argument
parseDirectiveMacro(StringRef Name,SMLoc NameLoc)5503 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5504   MCAsmMacroParameters Parameters;
5505   while (getLexer().isNot(AsmToken::EndOfStatement)) {
5506     if (!Parameters.empty() && Parameters.back().Vararg)
5507       return Error(Lexer.getLoc(),
5508                    "Vararg parameter '" + Parameters.back().Name +
5509                        "' should be last in the list of parameters");
5510 
5511     MCAsmMacroParameter Parameter;
5512     if (parseIdentifier(Parameter.Name))
5513       return TokError("expected identifier in 'macro' directive");
5514 
5515     // Emit an error if two (or more) named parameters share the same name.
5516     for (const MCAsmMacroParameter& CurrParam : Parameters)
5517       if (CurrParam.Name.equals_lower(Parameter.Name))
5518         return TokError("macro '" + Name + "' has multiple parameters"
5519                         " named '" + Parameter.Name + "'");
5520 
5521     if (Lexer.is(AsmToken::Colon)) {
5522       Lex();  // consume ':'
5523 
5524       if (parseOptionalToken(AsmToken::Equal)) {
5525         // Default value
5526         SMLoc ParamLoc;
5527 
5528         ParamLoc = Lexer.getLoc();
5529         if (parseMacroArgument(nullptr, Parameter.Value))
5530           return true;
5531       } else {
5532         SMLoc QualLoc;
5533         StringRef Qualifier;
5534 
5535         QualLoc = Lexer.getLoc();
5536         if (parseIdentifier(Qualifier))
5537           return Error(QualLoc, "missing parameter qualifier for "
5538                                 "'" +
5539                                     Parameter.Name + "' in macro '" + Name +
5540                                     "'");
5541 
5542         if (Qualifier.equals_lower("req"))
5543           Parameter.Required = true;
5544         else if (Qualifier.equals_lower("vararg"))
5545           Parameter.Vararg = true;
5546         else
5547           return Error(QualLoc,
5548                        Qualifier + " is not a valid parameter qualifier for '" +
5549                            Parameter.Name + "' in macro '" + Name + "'");
5550       }
5551     }
5552 
5553     Parameters.push_back(std::move(Parameter));
5554 
5555     if (getLexer().is(AsmToken::Comma))
5556       Lex();
5557   }
5558 
5559   // Eat just the end of statement.
5560   Lexer.Lex();
5561 
5562   std::vector<std::string> Locals;
5563   if (getTok().is(AsmToken::Identifier) &&
5564       getTok().getIdentifier().equals_lower("local")) {
5565     Lex(); // Eat the LOCAL directive.
5566 
5567     StringRef ID;
5568     while (true) {
5569       if (parseIdentifier(ID))
5570         return true;
5571       Locals.push_back(ID.lower());
5572 
5573       // If we see a comma, continue (and allow line continuation).
5574       if (!parseOptionalToken(AsmToken::Comma))
5575         break;
5576       parseOptionalToken(AsmToken::EndOfStatement);
5577     }
5578   }
5579 
5580   // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
5581   AsmToken EndToken, StartToken = getTok();
5582   unsigned MacroDepth = 0;
5583   bool IsMacroFunction = false;
5584   // Lex the macro definition.
5585   while (true) {
5586     // Ignore Lexing errors in macros.
5587     while (Lexer.is(AsmToken::Error)) {
5588       Lexer.Lex();
5589     }
5590 
5591     // Check whether we have reached the end of the file.
5592     if (getLexer().is(AsmToken::Eof))
5593       return Error(NameLoc, "no matching 'endm' in definition");
5594 
5595     // Otherwise, check whether we have reached the 'endm'... and determine if
5596     // this is a macro function.
5597     if (getLexer().is(AsmToken::Identifier)) {
5598       if (getTok().getIdentifier().equals_lower("endm")) {
5599         if (MacroDepth == 0) { // Outermost macro.
5600           EndToken = getTok();
5601           Lexer.Lex();
5602           if (getLexer().isNot(AsmToken::EndOfStatement))
5603             return TokError("unexpected token in '" + EndToken.getIdentifier() +
5604                             "' directive");
5605           break;
5606         } else {
5607           // Otherwise we just found the end of an inner macro.
5608           --MacroDepth;
5609         }
5610       } else if (getTok().getIdentifier().equals_lower("exitm")) {
5611         if (MacroDepth == 0 &&
5612             getLexer().peekTok().isNot(AsmToken::EndOfStatement)) {
5613           IsMacroFunction = true;
5614         }
5615       } else if (isMacroLikeDirective()) {
5616         // We allow nested macros. Those aren't instantiated until the
5617         // outermost macro is expanded so just ignore them for now.
5618         ++MacroDepth;
5619       }
5620     }
5621 
5622     // Otherwise, scan til the end of the statement.
5623     eatToEndOfStatement();
5624   }
5625 
5626   if (getContext().lookupMacro(Name.lower())) {
5627     return Error(NameLoc, "macro '" + Name + "' is already defined");
5628   }
5629 
5630   const char *BodyStart = StartToken.getLoc().getPointer();
5631   const char *BodyEnd = EndToken.getLoc().getPointer();
5632   StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
5633   MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
5634                    IsMacroFunction);
5635   DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
5636                   Macro.dump());
5637   getContext().defineMacro(Name, std::move(Macro));
5638   return false;
5639 }
5640 
5641 /// parseDirectiveExitMacro
5642 /// ::= "exitm" [textitem]
parseDirectiveExitMacro(SMLoc DirectiveLoc,StringRef Directive,std::string & Value)5643 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
5644                                          StringRef Directive,
5645                                          std::string &Value) {
5646   SMLoc EndLoc = getTok().getLoc();
5647   if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
5648     return Error(EndLoc,
5649                  "unable to parse text item in '" + Directive + "' directive");
5650   eatToEndOfStatement();
5651 
5652   if (!isInsideMacroInstantiation())
5653     return TokError("unexpected '" + Directive + "' in file, "
5654                                                  "no current macro definition");
5655 
5656   // Exit all conditionals that are active in the current macro.
5657   while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
5658     TheCondState = TheCondStack.back();
5659     TheCondStack.pop_back();
5660   }
5661 
5662   handleMacroExit();
5663   return false;
5664 }
5665 
5666 /// parseDirectiveEndMacro
5667 /// ::= endm
parseDirectiveEndMacro(StringRef Directive)5668 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
5669   if (getLexer().isNot(AsmToken::EndOfStatement))
5670     return TokError("unexpected token in '" + Directive + "' directive");
5671 
5672   // If we are inside a macro instantiation, terminate the current
5673   // instantiation.
5674   if (isInsideMacroInstantiation()) {
5675     handleMacroExit();
5676     return false;
5677   }
5678 
5679   // Otherwise, this .endmacro is a stray entry in the file; well formed
5680   // .endmacro directives are handled during the macro definition parsing.
5681   return TokError("unexpected '" + Directive + "' in file, "
5682                                                "no current macro definition");
5683 }
5684 
5685 /// parseDirectivePurgeMacro
5686 /// ::= purge identifier ( , identifier )*
parseDirectivePurgeMacro(SMLoc DirectiveLoc)5687 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
5688   StringRef Name;
5689   while (true) {
5690     SMLoc NameLoc;
5691     if (parseTokenLoc(NameLoc) ||
5692         check(parseIdentifier(Name), NameLoc,
5693               "expected identifier in 'purge' directive"))
5694       return true;
5695 
5696     DEBUG_WITH_TYPE("asm-macros", dbgs()
5697                                       << "Un-defining macro: " << Name << "\n");
5698     if (!getContext().lookupMacro(Name.lower()))
5699       return Error(NameLoc, "macro '" + Name + "' is not defined");
5700     getContext().undefineMacro(Name.lower());
5701 
5702     if (!parseOptionalToken(AsmToken::Comma))
5703       break;
5704     parseOptionalToken(AsmToken::EndOfStatement);
5705   }
5706 
5707   return false;
5708 }
5709 
5710 /// parseDirectiveSymbolAttribute
5711 ///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
parseDirectiveSymbolAttribute(MCSymbolAttr Attr)5712 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
5713   auto parseOp = [&]() -> bool {
5714     StringRef Name;
5715     SMLoc Loc = getTok().getLoc();
5716     if (parseIdentifier(Name))
5717       return Error(Loc, "expected identifier");
5718     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5719 
5720     // Assembler local symbols don't make any sense here. Complain loudly.
5721     if (Sym->isTemporary())
5722       return Error(Loc, "non-local symbol required");
5723 
5724     if (!getStreamer().emitSymbolAttribute(Sym, Attr))
5725       return Error(Loc, "unable to emit symbol attribute");
5726     return false;
5727   };
5728 
5729   if (parseMany(parseOp))
5730     return addErrorSuffix(" in directive");
5731   return false;
5732 }
5733 
5734 /// parseDirectiveComm
5735 ///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
parseDirectiveComm(bool IsLocal)5736 bool MasmParser::parseDirectiveComm(bool IsLocal) {
5737   if (checkForValidSection())
5738     return true;
5739 
5740   SMLoc IDLoc = getLexer().getLoc();
5741   StringRef Name;
5742   if (parseIdentifier(Name))
5743     return TokError("expected identifier in directive");
5744 
5745   // Handle the identifier as the key symbol.
5746   MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5747 
5748   if (getLexer().isNot(AsmToken::Comma))
5749     return TokError("unexpected token in directive");
5750   Lex();
5751 
5752   int64_t Size;
5753   SMLoc SizeLoc = getLexer().getLoc();
5754   if (parseAbsoluteExpression(Size))
5755     return true;
5756 
5757   int64_t Pow2Alignment = 0;
5758   SMLoc Pow2AlignmentLoc;
5759   if (getLexer().is(AsmToken::Comma)) {
5760     Lex();
5761     Pow2AlignmentLoc = getLexer().getLoc();
5762     if (parseAbsoluteExpression(Pow2Alignment))
5763       return true;
5764 
5765     LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
5766     if (IsLocal && LCOMM == LCOMM::NoAlignment)
5767       return Error(Pow2AlignmentLoc, "alignment not supported on this target");
5768 
5769     // If this target takes alignments in bytes (not log) validate and convert.
5770     if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
5771         (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
5772       if (!isPowerOf2_64(Pow2Alignment))
5773         return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
5774       Pow2Alignment = Log2_64(Pow2Alignment);
5775     }
5776   }
5777 
5778   if (parseToken(AsmToken::EndOfStatement,
5779                  "unexpected token in '.comm' or '.lcomm' directive"))
5780     return true;
5781 
5782   // NOTE: a size of zero for a .comm should create a undefined symbol
5783   // but a size of .lcomm creates a bss symbol of size zero.
5784   if (Size < 0)
5785     return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
5786                           "be less than zero");
5787 
5788   // NOTE: The alignment in the directive is a power of 2 value, the assembler
5789   // may internally end up wanting an alignment in bytes.
5790   // FIXME: Diagnose overflow.
5791   if (Pow2Alignment < 0)
5792     return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
5793                                    "alignment, can't be less than zero");
5794 
5795   Sym->redefineIfPossible();
5796   if (!Sym->isUndefined())
5797     return Error(IDLoc, "invalid symbol redefinition");
5798 
5799   // Create the Symbol as a common or local common with Size and Pow2Alignment.
5800   if (IsLocal) {
5801     getStreamer().emitLocalCommonSymbol(Sym, Size, 1 << Pow2Alignment);
5802     return false;
5803   }
5804 
5805   getStreamer().emitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
5806   return false;
5807 }
5808 
5809 /// parseDirectiveComment
5810 ///  ::= comment delimiter [[text]]
5811 ///              [[text]]
5812 ///              [[text]] delimiter [[text]]
parseDirectiveComment(SMLoc DirectiveLoc)5813 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
5814   std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
5815   size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
5816   StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
5817   if (Delimiter.empty())
5818     return Error(DirectiveLoc, "no delimiter in 'comment' directive");
5819   do {
5820     if (getTok().is(AsmToken::Eof))
5821       return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
5822     Lex();  // eat end of statement
5823   } while (
5824       !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
5825   return parseToken(AsmToken::EndOfStatement,
5826                     "unexpected token in 'comment' directive");
5827 }
5828 
5829 /// parseDirectiveInclude
5830 ///  ::= include <filename>
5831 ///    | include filename
parseDirectiveInclude()5832 bool MasmParser::parseDirectiveInclude() {
5833   // Allow the strings to have escaped octal character sequence.
5834   std::string Filename;
5835   SMLoc IncludeLoc = getTok().getLoc();
5836 
5837   if (!parseAngleBracketString(Filename))
5838     Filename = parseStringTo(AsmToken::EndOfStatement);
5839   if (check(!Filename.empty(), "missing filename in 'include' directive") ||
5840       check(getTok().isNot(AsmToken::EndOfStatement),
5841             "unexpected token in 'include' directive") ||
5842       // Attempt to switch the lexer to the included file before consuming the
5843       // end of statement to avoid losing it when we switch.
5844       check(enterIncludeFile(Filename), IncludeLoc,
5845             "Could not find include file '" + Filename + "'"))
5846     return true;
5847 
5848   return false;
5849 }
5850 
5851 /// parseDirectiveIf
5852 /// ::= .if{,eq,ge,gt,le,lt,ne} expression
parseDirectiveIf(SMLoc DirectiveLoc,DirectiveKind DirKind)5853 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
5854   TheCondStack.push_back(TheCondState);
5855   TheCondState.TheCond = AsmCond::IfCond;
5856   if (TheCondState.Ignore) {
5857     eatToEndOfStatement();
5858   } else {
5859     int64_t ExprValue;
5860     if (parseAbsoluteExpression(ExprValue) ||
5861         parseToken(AsmToken::EndOfStatement,
5862                    "unexpected token in '.if' directive"))
5863       return true;
5864 
5865     switch (DirKind) {
5866     default:
5867       llvm_unreachable("unsupported directive");
5868     case DK_IF:
5869       break;
5870     case DK_IFE:
5871       ExprValue = ExprValue == 0;
5872       break;
5873     }
5874 
5875     TheCondState.CondMet = ExprValue;
5876     TheCondState.Ignore = !TheCondState.CondMet;
5877   }
5878 
5879   return false;
5880 }
5881 
5882 /// parseDirectiveIfb
5883 /// ::= .ifb textitem
parseDirectiveIfb(SMLoc DirectiveLoc,bool ExpectBlank)5884 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
5885   TheCondStack.push_back(TheCondState);
5886   TheCondState.TheCond = AsmCond::IfCond;
5887 
5888   if (TheCondState.Ignore) {
5889     eatToEndOfStatement();
5890   } else {
5891     std::string Str;
5892     if (parseTextItem(Str))
5893       return TokError("expected text item parameter for 'ifb' directive");
5894 
5895     if (parseToken(AsmToken::EndOfStatement,
5896                    "unexpected token in 'ifb' directive"))
5897       return true;
5898 
5899     TheCondState.CondMet = ExpectBlank == Str.empty();
5900     TheCondState.Ignore = !TheCondState.CondMet;
5901   }
5902 
5903   return false;
5904 }
5905 
5906 /// parseDirectiveIfidn
5907 ///   ::= ifidn textitem, textitem
parseDirectiveIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)5908 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
5909                                      bool CaseInsensitive) {
5910   std::string String1, String2;
5911 
5912   if (parseTextItem(String1)) {
5913     if (ExpectEqual)
5914       return TokError("expected text item parameter for 'ifidn' directive");
5915     return TokError("expected text item parameter for 'ifdif' directive");
5916   }
5917 
5918   if (Lexer.isNot(AsmToken::Comma)) {
5919     if (ExpectEqual)
5920       return TokError(
5921           "expected comma after first string for 'ifidn' directive");
5922     return TokError("expected comma after first string for 'ifdif' directive");
5923   }
5924   Lex();
5925 
5926   if (parseTextItem(String2)) {
5927     if (ExpectEqual)
5928       return TokError("expected text item parameter for 'ifidn' directive");
5929     return TokError("expected text item parameter for 'ifdif' directive");
5930   }
5931 
5932   TheCondStack.push_back(TheCondState);
5933   TheCondState.TheCond = AsmCond::IfCond;
5934   if (CaseInsensitive)
5935     TheCondState.CondMet =
5936         ExpectEqual == (StringRef(String1).equals_lower(String2));
5937   else
5938     TheCondState.CondMet = ExpectEqual == (String1 == String2);
5939   TheCondState.Ignore = !TheCondState.CondMet;
5940 
5941   return false;
5942 }
5943 
5944 /// parseDirectiveIfdef
5945 /// ::= ifdef symbol
5946 ///   | ifdef variable
parseDirectiveIfdef(SMLoc DirectiveLoc,bool expect_defined)5947 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
5948   TheCondStack.push_back(TheCondState);
5949   TheCondState.TheCond = AsmCond::IfCond;
5950 
5951   if (TheCondState.Ignore) {
5952     eatToEndOfStatement();
5953   } else {
5954     bool is_defined = false;
5955     unsigned RegNo;
5956     SMLoc StartLoc, EndLoc;
5957     is_defined = (getTargetParser().tryParseRegister(
5958                       RegNo, StartLoc, EndLoc) == MatchOperand_Success);
5959     if (!is_defined) {
5960       StringRef Name;
5961       if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
5962           parseToken(AsmToken::EndOfStatement, "unexpected token in 'ifdef'"))
5963         return true;
5964 
5965       if (Variables.find(Name.lower()) != Variables.end()) {
5966         is_defined = true;
5967       } else {
5968         MCSymbol *Sym = getContext().lookupSymbol(Name.lower());
5969         is_defined = (Sym && !Sym->isUndefined(false));
5970       }
5971     }
5972 
5973     TheCondState.CondMet = (is_defined == expect_defined);
5974     TheCondState.Ignore = !TheCondState.CondMet;
5975   }
5976 
5977   return false;
5978 }
5979 
5980 /// parseDirectiveElseIf
5981 /// ::= elseif expression
parseDirectiveElseIf(SMLoc DirectiveLoc,DirectiveKind DirKind)5982 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
5983                                       DirectiveKind DirKind) {
5984   if (TheCondState.TheCond != AsmCond::IfCond &&
5985       TheCondState.TheCond != AsmCond::ElseIfCond)
5986     return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
5987                                " .if or  an .elseif");
5988   TheCondState.TheCond = AsmCond::ElseIfCond;
5989 
5990   bool LastIgnoreState = false;
5991   if (!TheCondStack.empty())
5992     LastIgnoreState = TheCondStack.back().Ignore;
5993   if (LastIgnoreState || TheCondState.CondMet) {
5994     TheCondState.Ignore = true;
5995     eatToEndOfStatement();
5996   } else {
5997     int64_t ExprValue;
5998     if (parseAbsoluteExpression(ExprValue))
5999       return true;
6000 
6001     if (parseToken(AsmToken::EndOfStatement,
6002                    "unexpected token in '.elseif' directive"))
6003       return true;
6004 
6005     switch (DirKind) {
6006     default:
6007       llvm_unreachable("unsupported directive");
6008     case DK_ELSEIF:
6009       break;
6010     case DK_ELSEIFE:
6011       ExprValue = ExprValue == 0;
6012       break;
6013     }
6014 
6015     TheCondState.CondMet = ExprValue;
6016     TheCondState.Ignore = !TheCondState.CondMet;
6017   }
6018 
6019   return false;
6020 }
6021 
6022 /// parseDirectiveElseIfb
6023 /// ::= elseifb textitem
parseDirectiveElseIfb(SMLoc DirectiveLoc,bool ExpectBlank)6024 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6025   if (TheCondState.TheCond != AsmCond::IfCond &&
6026       TheCondState.TheCond != AsmCond::ElseIfCond)
6027     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6028                                " if or an elseif");
6029   TheCondState.TheCond = AsmCond::ElseIfCond;
6030 
6031   bool LastIgnoreState = false;
6032   if (!TheCondStack.empty())
6033     LastIgnoreState = TheCondStack.back().Ignore;
6034   if (LastIgnoreState || TheCondState.CondMet) {
6035     TheCondState.Ignore = true;
6036     eatToEndOfStatement();
6037   } else {
6038     std::string Str;
6039     if (parseTextItem(Str)) {
6040       if (ExpectBlank)
6041         return TokError("expected text item parameter for 'elseifb' directive");
6042       return TokError("expected text item parameter for 'elseifnb' directive");
6043     }
6044 
6045     if (parseToken(AsmToken::EndOfStatement,
6046                    "unexpected token in 'elseifb' directive"))
6047       return true;
6048 
6049     TheCondState.CondMet = ExpectBlank == Str.empty();
6050     TheCondState.Ignore = !TheCondState.CondMet;
6051   }
6052 
6053   return false;
6054 }
6055 
6056 /// parseDirectiveElseIfdef
6057 /// ::= elseifdef symbol
6058 ///   | elseifdef variable
parseDirectiveElseIfdef(SMLoc DirectiveLoc,bool expect_defined)6059 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
6060                                          bool expect_defined) {
6061   if (TheCondState.TheCond != AsmCond::IfCond &&
6062       TheCondState.TheCond != AsmCond::ElseIfCond)
6063     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6064                                " if or an elseif");
6065   TheCondState.TheCond = AsmCond::ElseIfCond;
6066 
6067   bool LastIgnoreState = false;
6068   if (!TheCondStack.empty())
6069     LastIgnoreState = TheCondStack.back().Ignore;
6070   if (LastIgnoreState || TheCondState.CondMet) {
6071     TheCondState.Ignore = true;
6072     eatToEndOfStatement();
6073   } else {
6074     bool is_defined = false;
6075     unsigned RegNo;
6076     SMLoc StartLoc, EndLoc;
6077     is_defined = (getTargetParser().tryParseRegister(RegNo, StartLoc, EndLoc) ==
6078                   MatchOperand_Success);
6079     if (!is_defined) {
6080       StringRef Name;
6081       if (check(parseIdentifier(Name),
6082                 "expected identifier after 'elseifdef'") ||
6083           parseToken(AsmToken::EndOfStatement,
6084                      "unexpected token in 'elseifdef'"))
6085         return true;
6086 
6087       if (Variables.find(Name.lower()) != Variables.end()) {
6088         is_defined = true;
6089       } else {
6090         MCSymbol *Sym = getContext().lookupSymbol(Name);
6091         is_defined = (Sym && !Sym->isUndefined(false));
6092       }
6093     }
6094 
6095     TheCondState.CondMet = (is_defined == expect_defined);
6096     TheCondState.Ignore = !TheCondState.CondMet;
6097   }
6098 
6099   return false;
6100 }
6101 
6102 /// parseDirectiveElseIfidn
6103 /// ::= elseifidn textitem, textitem
parseDirectiveElseIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6104 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6105                                          bool CaseInsensitive) {
6106   if (TheCondState.TheCond != AsmCond::IfCond &&
6107       TheCondState.TheCond != AsmCond::ElseIfCond)
6108     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6109                                " if or an elseif");
6110   TheCondState.TheCond = AsmCond::ElseIfCond;
6111 
6112   bool LastIgnoreState = false;
6113   if (!TheCondStack.empty())
6114     LastIgnoreState = TheCondStack.back().Ignore;
6115   if (LastIgnoreState || TheCondState.CondMet) {
6116     TheCondState.Ignore = true;
6117     eatToEndOfStatement();
6118   } else {
6119     std::string String1, String2;
6120 
6121     if (parseTextItem(String1)) {
6122       if (ExpectEqual)
6123         return TokError(
6124             "expected text item parameter for 'elseifidn' directive");
6125       return TokError("expected text item parameter for 'elseifdif' directive");
6126     }
6127 
6128     if (Lexer.isNot(AsmToken::Comma)) {
6129       if (ExpectEqual)
6130         return TokError(
6131             "expected comma after first string for 'elseifidn' directive");
6132       return TokError(
6133           "expected comma after first string for 'elseifdif' directive");
6134     }
6135     Lex();
6136 
6137     if (parseTextItem(String2)) {
6138       if (ExpectEqual)
6139         return TokError(
6140             "expected text item parameter for 'elseifidn' directive");
6141       return TokError("expected text item parameter for 'elseifdif' directive");
6142     }
6143 
6144     if (CaseInsensitive)
6145       TheCondState.CondMet =
6146           ExpectEqual == (StringRef(String1).equals_lower(String2));
6147     else
6148       TheCondState.CondMet = ExpectEqual == (String1 == String2);
6149     TheCondState.Ignore = !TheCondState.CondMet;
6150   }
6151 
6152   return false;
6153 }
6154 
6155 /// parseDirectiveElse
6156 /// ::= else
parseDirectiveElse(SMLoc DirectiveLoc)6157 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
6158   if (parseToken(AsmToken::EndOfStatement,
6159                  "unexpected token in 'else' directive"))
6160     return true;
6161 
6162   if (TheCondState.TheCond != AsmCond::IfCond &&
6163       TheCondState.TheCond != AsmCond::ElseIfCond)
6164     return Error(DirectiveLoc, "Encountered an else that doesn't follow an if"
6165                                " or an elseif");
6166   TheCondState.TheCond = AsmCond::ElseCond;
6167   bool LastIgnoreState = false;
6168   if (!TheCondStack.empty())
6169     LastIgnoreState = TheCondStack.back().Ignore;
6170   if (LastIgnoreState || TheCondState.CondMet)
6171     TheCondState.Ignore = true;
6172   else
6173     TheCondState.Ignore = false;
6174 
6175   return false;
6176 }
6177 
6178 /// parseDirectiveEnd
6179 /// ::= end
parseDirectiveEnd(SMLoc DirectiveLoc)6180 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
6181   if (parseToken(AsmToken::EndOfStatement,
6182                  "unexpected token in 'end' directive"))
6183     return true;
6184 
6185   while (Lexer.isNot(AsmToken::Eof))
6186     Lexer.Lex();
6187 
6188   return false;
6189 }
6190 
6191 /// parseDirectiveError
6192 ///   ::= .err [message]
parseDirectiveError(SMLoc DirectiveLoc)6193 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
6194   if (!TheCondStack.empty()) {
6195     if (TheCondStack.back().Ignore) {
6196       eatToEndOfStatement();
6197       return false;
6198     }
6199   }
6200 
6201   std::string Message = ".err directive invoked in source file";
6202   if (Lexer.isNot(AsmToken::EndOfStatement))
6203     Message = parseStringTo(AsmToken::EndOfStatement);
6204   Lex();
6205 
6206   return Error(DirectiveLoc, Message);
6207 }
6208 
6209 /// parseDirectiveErrorIfb
6210 ///   ::= .errb textitem[, message]
parseDirectiveErrorIfb(SMLoc DirectiveLoc,bool ExpectBlank)6211 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6212   if (!TheCondStack.empty()) {
6213     if (TheCondStack.back().Ignore) {
6214       eatToEndOfStatement();
6215       return false;
6216     }
6217   }
6218 
6219   std::string Text;
6220   if (parseTextItem(Text))
6221     return Error(getTok().getLoc(), "missing text item in '.errb' directive");
6222 
6223   std::string Message = ".errb directive invoked in source file";
6224   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6225     if (parseToken(AsmToken::Comma))
6226       return addErrorSuffix(" in '.errb' directive");
6227     Message = parseStringTo(AsmToken::EndOfStatement);
6228   }
6229   Lex();
6230 
6231   if (Text.empty() == ExpectBlank)
6232     return Error(DirectiveLoc, Message);
6233   return false;
6234 }
6235 
6236 /// parseDirectiveErrorIfdef
6237 ///   ::= .errdef name[, message]
parseDirectiveErrorIfdef(SMLoc DirectiveLoc,bool ExpectDefined)6238 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
6239                                           bool ExpectDefined) {
6240   if (!TheCondStack.empty()) {
6241     if (TheCondStack.back().Ignore) {
6242       eatToEndOfStatement();
6243       return false;
6244     }
6245   }
6246 
6247   bool IsDefined = false;
6248   unsigned RegNo;
6249   SMLoc StartLoc, EndLoc;
6250   IsDefined = (getTargetParser().tryParseRegister(RegNo, StartLoc, EndLoc) ==
6251                MatchOperand_Success);
6252   if (!IsDefined) {
6253     StringRef Name;
6254     if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
6255       return true;
6256 
6257     if (Variables.find(Name.lower()) != Variables.end()) {
6258       IsDefined = true;
6259     } else {
6260       MCSymbol *Sym = getContext().lookupSymbol(Name);
6261       IsDefined = (Sym && !Sym->isUndefined(false));
6262     }
6263   }
6264 
6265   std::string Message = ".errdef directive invoked in source file";
6266   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6267     if (parseToken(AsmToken::Comma))
6268       return addErrorSuffix(" in '.errdef' directive");
6269     Message = parseStringTo(AsmToken::EndOfStatement);
6270   }
6271   Lex();
6272 
6273   if (IsDefined == ExpectDefined)
6274     return Error(DirectiveLoc, Message);
6275   return false;
6276 }
6277 
6278 /// parseDirectiveErrorIfidn
6279 ///   ::= .erridn textitem, textitem[, message]
parseDirectiveErrorIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6280 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6281                                           bool CaseInsensitive) {
6282   if (!TheCondStack.empty()) {
6283     if (TheCondStack.back().Ignore) {
6284       eatToEndOfStatement();
6285       return false;
6286     }
6287   }
6288 
6289   std::string String1, String2;
6290 
6291   if (parseTextItem(String1)) {
6292     if (ExpectEqual)
6293       return TokError("expected string parameter for '.erridn' directive");
6294     return TokError("expected string parameter for '.errdif' directive");
6295   }
6296 
6297   if (Lexer.isNot(AsmToken::Comma)) {
6298     if (ExpectEqual)
6299       return TokError(
6300           "expected comma after first string for '.erridn' directive");
6301     return TokError(
6302         "expected comma after first string for '.errdif' directive");
6303   }
6304   Lex();
6305 
6306   if (parseTextItem(String2)) {
6307     if (ExpectEqual)
6308       return TokError("expected string parameter for '.erridn' directive");
6309     return TokError("expected string parameter for '.errdif' directive");
6310   }
6311 
6312   std::string Message;
6313   if (ExpectEqual)
6314     Message = ".erridn directive invoked in source file";
6315   else
6316     Message = ".errdif directive invoked in source file";
6317   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6318     if (parseToken(AsmToken::Comma))
6319       return addErrorSuffix(" in '.erridn' directive");
6320     Message = parseStringTo(AsmToken::EndOfStatement);
6321   }
6322   Lex();
6323 
6324   if (CaseInsensitive)
6325     TheCondState.CondMet =
6326         ExpectEqual == (StringRef(String1).equals_lower(String2));
6327   else
6328     TheCondState.CondMet = ExpectEqual == (String1 == String2);
6329   TheCondState.Ignore = !TheCondState.CondMet;
6330 
6331   if ((CaseInsensitive &&
6332        ExpectEqual == StringRef(String1).equals_lower(String2)) ||
6333       (ExpectEqual == (String1 == String2)))
6334     return Error(DirectiveLoc, Message);
6335   return false;
6336 }
6337 
6338 /// parseDirectiveErrorIfe
6339 ///   ::= .erre expression[, message]
parseDirectiveErrorIfe(SMLoc DirectiveLoc,bool ExpectZero)6340 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
6341   if (!TheCondStack.empty()) {
6342     if (TheCondStack.back().Ignore) {
6343       eatToEndOfStatement();
6344       return false;
6345     }
6346   }
6347 
6348   int64_t ExprValue;
6349   if (parseAbsoluteExpression(ExprValue))
6350     return addErrorSuffix(" in '.erre' directive");
6351 
6352   std::string Message = ".erre directive invoked in source file";
6353   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6354     if (parseToken(AsmToken::Comma))
6355       return addErrorSuffix(" in '.erre' directive");
6356     Message = parseStringTo(AsmToken::EndOfStatement);
6357   }
6358   Lex();
6359 
6360   if ((ExprValue == 0) == ExpectZero)
6361     return Error(DirectiveLoc, Message);
6362   return false;
6363 }
6364 
6365 /// parseDirectiveEndIf
6366 /// ::= .endif
parseDirectiveEndIf(SMLoc DirectiveLoc)6367 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
6368   if (parseToken(AsmToken::EndOfStatement,
6369                  "unexpected token in '.endif' directive"))
6370     return true;
6371 
6372   if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
6373     return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
6374                                "an .if or .else");
6375   if (!TheCondStack.empty()) {
6376     TheCondState = TheCondStack.back();
6377     TheCondStack.pop_back();
6378   }
6379 
6380   return false;
6381 }
6382 
initializeDirectiveKindMap()6383 void MasmParser::initializeDirectiveKindMap() {
6384   DirectiveKindMap["="] = DK_ASSIGN;
6385   DirectiveKindMap["equ"] = DK_EQU;
6386   DirectiveKindMap["textequ"] = DK_TEXTEQU;
6387   // DirectiveKindMap[".ascii"] = DK_ASCII;
6388   // DirectiveKindMap[".asciz"] = DK_ASCIZ;
6389   // DirectiveKindMap[".string"] = DK_STRING;
6390   DirectiveKindMap["byte"] = DK_BYTE;
6391   DirectiveKindMap["sbyte"] = DK_SBYTE;
6392   DirectiveKindMap["word"] = DK_WORD;
6393   DirectiveKindMap["sword"] = DK_SWORD;
6394   DirectiveKindMap["dword"] = DK_DWORD;
6395   DirectiveKindMap["sdword"] = DK_SDWORD;
6396   DirectiveKindMap["fword"] = DK_FWORD;
6397   DirectiveKindMap["qword"] = DK_QWORD;
6398   DirectiveKindMap["sqword"] = DK_SQWORD;
6399   DirectiveKindMap["real4"] = DK_REAL4;
6400   DirectiveKindMap["real8"] = DK_REAL8;
6401   DirectiveKindMap["real10"] = DK_REAL10;
6402   DirectiveKindMap["align"] = DK_ALIGN;
6403   // DirectiveKindMap[".org"] = DK_ORG;
6404   DirectiveKindMap["extern"] = DK_EXTERN;
6405   DirectiveKindMap["public"] = DK_PUBLIC;
6406   // DirectiveKindMap[".comm"] = DK_COMM;
6407   DirectiveKindMap["comment"] = DK_COMMENT;
6408   DirectiveKindMap["include"] = DK_INCLUDE;
6409   DirectiveKindMap["repeat"] = DK_REPEAT;
6410   DirectiveKindMap["rept"] = DK_REPEAT;
6411   DirectiveKindMap["while"] = DK_WHILE;
6412   DirectiveKindMap["for"] = DK_FOR;
6413   DirectiveKindMap["irp"] = DK_FOR;
6414   DirectiveKindMap["forc"] = DK_FORC;
6415   DirectiveKindMap["irpc"] = DK_FORC;
6416   DirectiveKindMap["if"] = DK_IF;
6417   DirectiveKindMap["ife"] = DK_IFE;
6418   DirectiveKindMap["ifb"] = DK_IFB;
6419   DirectiveKindMap["ifnb"] = DK_IFNB;
6420   DirectiveKindMap["ifdef"] = DK_IFDEF;
6421   DirectiveKindMap["ifndef"] = DK_IFNDEF;
6422   DirectiveKindMap["ifdif"] = DK_IFDIF;
6423   DirectiveKindMap["ifdifi"] = DK_IFDIFI;
6424   DirectiveKindMap["ifidn"] = DK_IFIDN;
6425   DirectiveKindMap["ifidni"] = DK_IFIDNI;
6426   DirectiveKindMap["elseif"] = DK_ELSEIF;
6427   DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF;
6428   DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF;
6429   DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF;
6430   DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN;
6431   DirectiveKindMap["else"] = DK_ELSE;
6432   DirectiveKindMap["end"] = DK_END;
6433   DirectiveKindMap["endif"] = DK_ENDIF;
6434   // DirectiveKindMap[".file"] = DK_FILE;
6435   // DirectiveKindMap[".line"] = DK_LINE;
6436   // DirectiveKindMap[".loc"] = DK_LOC;
6437   // DirectiveKindMap[".stabs"] = DK_STABS;
6438   // DirectiveKindMap[".cv_file"] = DK_CV_FILE;
6439   // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
6440   // DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
6441   // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
6442   // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
6443   // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
6444   // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
6445   // DirectiveKindMap[".cv_string"] = DK_CV_STRING;
6446   // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
6447   // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
6448   // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
6449   // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA;
6450   // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
6451   // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
6452   // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
6453   // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
6454   // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
6455   // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
6456   // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
6457   // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
6458   // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
6459   // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
6460   // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
6461   // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
6462   // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
6463   // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
6464   // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
6465   // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
6466   // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN;
6467   // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
6468   // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
6469   // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
6470   // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
6471   // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
6472   DirectiveKindMap["macro"] = DK_MACRO;
6473   DirectiveKindMap["exitm"] = DK_EXITM;
6474   DirectiveKindMap["endm"] = DK_ENDM;
6475   DirectiveKindMap["purge"] = DK_PURGE;
6476   DirectiveKindMap[".err"] = DK_ERR;
6477   DirectiveKindMap[".errb"] = DK_ERRB;
6478   DirectiveKindMap[".errnb"] = DK_ERRNB;
6479   DirectiveKindMap[".errdef"] = DK_ERRDEF;
6480   DirectiveKindMap[".errndef"] = DK_ERRNDEF;
6481   DirectiveKindMap[".errdif"] = DK_ERRDIF;
6482   DirectiveKindMap[".errdifi"] = DK_ERRDIFI;
6483   DirectiveKindMap[".erridn"] = DK_ERRIDN;
6484   DirectiveKindMap[".erridni"] = DK_ERRIDNI;
6485   DirectiveKindMap[".erre"] = DK_ERRE;
6486   DirectiveKindMap[".errnz"] = DK_ERRNZ;
6487   DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
6488   DirectiveKindMap[".pushreg"] = DK_PUSHREG;
6489   DirectiveKindMap[".savereg"] = DK_SAVEREG;
6490   DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
6491   DirectiveKindMap[".setframe"] = DK_SETFRAME;
6492   DirectiveKindMap[".radix"] = DK_RADIX;
6493   DirectiveKindMap["db"] = DK_DB;
6494   DirectiveKindMap["dd"] = DK_DD;
6495   DirectiveKindMap["df"] = DK_DF;
6496   DirectiveKindMap["dq"] = DK_DQ;
6497   DirectiveKindMap["dw"] = DK_DW;
6498   DirectiveKindMap["echo"] = DK_ECHO;
6499   DirectiveKindMap["struc"] = DK_STRUCT;
6500   DirectiveKindMap["struct"] = DK_STRUCT;
6501   DirectiveKindMap["union"] = DK_UNION;
6502   DirectiveKindMap["ends"] = DK_ENDS;
6503 }
6504 
isMacroLikeDirective()6505 bool MasmParser::isMacroLikeDirective() {
6506   if (getLexer().is(AsmToken::Identifier)) {
6507     bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
6508                            .CasesLower("repeat", "rept", true)
6509                            .CaseLower("while", true)
6510                            .CasesLower("for", "irp", true)
6511                            .CasesLower("forc", "irpc", true)
6512                            .Default(false);
6513     if (IsMacroLike)
6514       return true;
6515   }
6516   if (getLexer().peekTok().is(AsmToken::Identifier) &&
6517       getLexer().peekTok().getIdentifier().equals_lower("macro"))
6518     return true;
6519 
6520   return false;
6521 }
6522 
parseMacroLikeBody(SMLoc DirectiveLoc)6523 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
6524   AsmToken EndToken, StartToken = getTok();
6525 
6526   unsigned NestLevel = 0;
6527   while (true) {
6528     // Check whether we have reached the end of the file.
6529     if (getLexer().is(AsmToken::Eof)) {
6530       printError(DirectiveLoc, "no matching 'endm' in definition");
6531       return nullptr;
6532     }
6533 
6534     if (isMacroLikeDirective())
6535       ++NestLevel;
6536 
6537     // Otherwise, check whether we have reached the endm.
6538     if (Lexer.is(AsmToken::Identifier) &&
6539         getTok().getIdentifier().equals_lower("endm")) {
6540       if (NestLevel == 0) {
6541         EndToken = getTok();
6542         Lex();
6543         if (Lexer.isNot(AsmToken::EndOfStatement)) {
6544           printError(getTok().getLoc(), "unexpected token in 'endm' directive");
6545           return nullptr;
6546         }
6547         break;
6548       }
6549       --NestLevel;
6550     }
6551 
6552     // Otherwise, scan till the end of the statement.
6553     eatToEndOfStatement();
6554   }
6555 
6556   const char *BodyStart = StartToken.getLoc().getPointer();
6557   const char *BodyEnd = EndToken.getLoc().getPointer();
6558   StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
6559 
6560   // We Are Anonymous.
6561   MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
6562   return &MacroLikeBodies.back();
6563 }
6564 
expandStatement(SMLoc Loc)6565 bool MasmParser::expandStatement(SMLoc Loc) {
6566   std::string Body = parseStringTo(AsmToken::EndOfStatement);
6567   SMLoc EndLoc = getTok().getLoc();
6568 
6569   MCAsmMacroParameters Parameters;
6570   MCAsmMacroArguments Arguments;
6571   for (const auto &V : Variables) {
6572     const Variable &Var = V.getValue();
6573     if (Var.IsText) {
6574       Parameters.emplace_back();
6575       Arguments.emplace_back();
6576       MCAsmMacroParameter &P = Parameters.back();
6577       MCAsmMacroArgument &A = Arguments.back();
6578       P.Name = Var.Name;
6579       P.Required = true;
6580       A.push_back(AsmToken(AsmToken::String, Var.TextValue));
6581     }
6582   }
6583   MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
6584   MCAsmMacro M = MacroLikeBodies.back();
6585 
6586   // Expand the statement in a new buffer.
6587   SmallString<80> Buf;
6588   raw_svector_ostream OS(Buf);
6589   if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
6590     return true;
6591   std::unique_ptr<MemoryBuffer> Expansion =
6592       MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
6593 
6594   // Jump to the expanded statement and prime the lexer.
6595   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
6596   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6597   EndStatementAtEOFStack.push_back(false);
6598   Lex();
6599   return false;
6600 }
6601 
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,raw_svector_ostream & OS)6602 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6603                                           raw_svector_ostream &OS) {
6604   instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
6605 }
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,SMLoc ExitLoc,raw_svector_ostream & OS)6606 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6607                                           SMLoc ExitLoc,
6608                                           raw_svector_ostream &OS) {
6609   OS << "endm\n";
6610 
6611   std::unique_ptr<MemoryBuffer> Instantiation =
6612       MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
6613 
6614   // Create the macro instantiation object and add to the current macro
6615   // instantiation stack.
6616   MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
6617                                                   ExitLoc, TheCondStack.size()};
6618   ActiveMacros.push_back(MI);
6619 
6620   // Jump to the macro instantiation and prime the lexer.
6621   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
6622   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6623   EndStatementAtEOFStack.push_back(true);
6624   Lex();
6625 }
6626 
6627 /// parseDirectiveRepeat
6628 ///   ::= ("repeat" | "rept") count
6629 ///       body
6630 ///     endm
parseDirectiveRepeat(SMLoc DirectiveLoc,StringRef Dir)6631 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
6632   const MCExpr *CountExpr;
6633   SMLoc CountLoc = getTok().getLoc();
6634   if (parseExpression(CountExpr))
6635     return true;
6636 
6637   int64_t Count;
6638   if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) {
6639     return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
6640   }
6641 
6642   if (check(Count < 0, CountLoc, "Count is negative") ||
6643       parseToken(AsmToken::EndOfStatement,
6644                  "unexpected token in '" + Dir + "' directive"))
6645     return true;
6646 
6647   // Lex the repeat definition.
6648   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6649   if (!M)
6650     return true;
6651 
6652   // Macro instantiation is lexical, unfortunately. We construct a new buffer
6653   // to hold the macro body with substitutions.
6654   SmallString<256> Buf;
6655   raw_svector_ostream OS(Buf);
6656   while (Count--) {
6657     if (expandMacro(OS, M->Body, None, None, M->Locals, getTok().getLoc()))
6658       return true;
6659   }
6660   instantiateMacroLikeBody(M, DirectiveLoc, OS);
6661 
6662   return false;
6663 }
6664 
6665 /// parseDirectiveWhile
6666 /// ::= "while" expression
6667 ///       body
6668 ///     endm
parseDirectiveWhile(SMLoc DirectiveLoc)6669 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
6670   const MCExpr *CondExpr;
6671   SMLoc CondLoc = getTok().getLoc();
6672   if (parseExpression(CondExpr))
6673     return true;
6674 
6675   // Lex the repeat definition.
6676   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6677   if (!M)
6678     return true;
6679 
6680   // Macro instantiation is lexical, unfortunately. We construct a new buffer
6681   // to hold the macro body with substitutions.
6682   SmallString<256> Buf;
6683   raw_svector_ostream OS(Buf);
6684   int64_t Condition;
6685   if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
6686     return Error(CondLoc, "expected absolute expression in 'while' directive");
6687   if (Condition) {
6688     // Instantiate the macro, then resume at this directive to recheck the
6689     // condition.
6690     if (expandMacro(OS, M->Body, None, None, M->Locals, getTok().getLoc()))
6691       return true;
6692     instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
6693   }
6694 
6695   return false;
6696 }
6697 
6698 /// parseDirectiveFor
6699 /// ::= ("for" | "irp") symbol [":" qualifier], <values>
6700 ///       body
6701 ///     endm
parseDirectiveFor(SMLoc DirectiveLoc,StringRef Dir)6702 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
6703   MCAsmMacroParameter Parameter;
6704   MCAsmMacroArguments A;
6705   if (check(parseIdentifier(Parameter.Name),
6706             "expected identifier in '" + Dir + "' directive"))
6707     return true;
6708 
6709   // Parse optional qualifier (default value, or "req")
6710   if (parseOptionalToken(AsmToken::Colon)) {
6711     if (parseOptionalToken(AsmToken::Equal)) {
6712       // Default value
6713       SMLoc ParamLoc;
6714 
6715       ParamLoc = Lexer.getLoc();
6716       if (parseMacroArgument(nullptr, Parameter.Value))
6717         return true;
6718     } else {
6719       SMLoc QualLoc;
6720       StringRef Qualifier;
6721 
6722       QualLoc = Lexer.getLoc();
6723       if (parseIdentifier(Qualifier))
6724         return Error(QualLoc, "missing parameter qualifier for "
6725                               "'" +
6726                                   Parameter.Name + "' in '" + Dir +
6727                                   "' directive");
6728 
6729       if (Qualifier.equals_lower("req"))
6730         Parameter.Required = true;
6731       else
6732         return Error(QualLoc,
6733                      Qualifier + " is not a valid parameter qualifier for '" +
6734                          Parameter.Name + "' in '" + Dir + "' directive");
6735     }
6736   }
6737 
6738   if (parseToken(AsmToken::Comma,
6739                  "expected comma in '" + Dir + "' directive") ||
6740       parseToken(AsmToken::Less,
6741                  "values in '" + Dir +
6742                      "' directive must be enclosed in angle brackets"))
6743     return true;
6744 
6745   while (true) {
6746     A.emplace_back();
6747     if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
6748       return addErrorSuffix(" in arguments for '" + Dir + "' directive");
6749 
6750     // If we see a comma, continue, and allow line continuation.
6751     if (!parseOptionalToken(AsmToken::Comma))
6752       break;
6753     parseOptionalToken(AsmToken::EndOfStatement);
6754   }
6755 
6756   if (parseToken(AsmToken::Greater,
6757                  "values in '" + Dir +
6758                      "' directive must be enclosed in angle brackets") ||
6759       parseToken(AsmToken::EndOfStatement, "expected End of Statement"))
6760     return true;
6761 
6762   // Lex the for definition.
6763   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6764   if (!M)
6765     return true;
6766 
6767   // Macro instantiation is lexical, unfortunately. We construct a new buffer
6768   // to hold the macro body with substitutions.
6769   SmallString<256> Buf;
6770   raw_svector_ostream OS(Buf);
6771 
6772   for (const MCAsmMacroArgument &Arg : A) {
6773     if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
6774       return true;
6775   }
6776 
6777   instantiateMacroLikeBody(M, DirectiveLoc, OS);
6778 
6779   return false;
6780 }
6781 
6782 /// parseDirectiveForc
6783 /// ::= ("forc" | "irpc") symbol, <string>
6784 ///       body
6785 ///     endm
parseDirectiveForc(SMLoc DirectiveLoc,StringRef Directive)6786 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
6787   MCAsmMacroParameter Parameter;
6788 
6789   std::string Argument;
6790   if (check(parseIdentifier(Parameter.Name),
6791             "expected identifier in '" + Directive + "' directive") ||
6792       parseToken(AsmToken::Comma,
6793                  "expected comma in '" + Directive + "' directive"))
6794     return true;
6795   if (parseAngleBracketString(Argument)) {
6796     // Match ml64.exe; treat all characters to end of statement as a string,
6797     // ignoring comment markers, then discard anything following a space (using
6798     // the C locale).
6799     Argument = parseStringTo(AsmToken::EndOfStatement);
6800     if (getTok().is(AsmToken::EndOfStatement))
6801       Argument += getTok().getString();
6802     size_t End = 0;
6803     for (; End < Argument.size(); ++End) {
6804       if (isSpace(Argument[End]))
6805         break;
6806     }
6807     Argument.resize(End);
6808   }
6809   if (parseToken(AsmToken::EndOfStatement, "expected end of statement"))
6810     return true;
6811 
6812   // Lex the irpc definition.
6813   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6814   if (!M)
6815     return true;
6816 
6817   // Macro instantiation is lexical, unfortunately. We construct a new buffer
6818   // to hold the macro body with substitutions.
6819   SmallString<256> Buf;
6820   raw_svector_ostream OS(Buf);
6821 
6822   StringRef Values(Argument);
6823   for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
6824     MCAsmMacroArgument Arg;
6825     Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1));
6826 
6827     if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
6828       return true;
6829   }
6830 
6831   instantiateMacroLikeBody(M, DirectiveLoc, OS);
6832 
6833   return false;
6834 }
6835 
parseDirectiveMSEmit(SMLoc IDLoc,ParseStatementInfo & Info,size_t Len)6836 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
6837                                       size_t Len) {
6838   const MCExpr *Value;
6839   SMLoc ExprLoc = getLexer().getLoc();
6840   if (parseExpression(Value))
6841     return true;
6842   const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
6843   if (!MCE)
6844     return Error(ExprLoc, "unexpected expression in _emit");
6845   uint64_t IntValue = MCE->getValue();
6846   if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
6847     return Error(ExprLoc, "literal value out of range for directive");
6848 
6849   Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
6850   return false;
6851 }
6852 
parseDirectiveMSAlign(SMLoc IDLoc,ParseStatementInfo & Info)6853 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
6854   const MCExpr *Value;
6855   SMLoc ExprLoc = getLexer().getLoc();
6856   if (parseExpression(Value))
6857     return true;
6858   const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
6859   if (!MCE)
6860     return Error(ExprLoc, "unexpected expression in align");
6861   uint64_t IntValue = MCE->getValue();
6862   if (!isPowerOf2_64(IntValue))
6863     return Error(ExprLoc, "literal value not a power of two greater then zero");
6864 
6865   Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
6866   return false;
6867 }
6868 
parseDirectiveRadix(SMLoc DirectiveLoc)6869 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
6870   const SMLoc Loc = getLexer().getLoc();
6871   std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
6872   StringRef RadixString = StringRef(RadixStringRaw).trim();
6873   unsigned Radix;
6874   if (RadixString.getAsInteger(10, Radix)) {
6875     return Error(Loc,
6876                  "radix must be a decimal number in the range 2 to 16; was " +
6877                      RadixString);
6878   }
6879   if (Radix < 2 || Radix > 16)
6880     return Error(Loc, "radix must be in the range 2 to 16; was " +
6881                           std::to_string(Radix));
6882   getLexer().setMasmDefaultRadix(Radix);
6883   return false;
6884 }
6885 
6886 /// parseDirectiveEcho
6887 ///   ::= "echo" message
parseDirectiveEcho()6888 bool MasmParser::parseDirectiveEcho() {
6889   // We're called before the directive is parsed, to avoid triggering lexical
6890   // substitutions in the message. Assert that the next token is the directive,
6891   // then eat it without using the Parser's Lex method.
6892   assert(getTok().is(AsmToken::Identifier) &&
6893          getTok().getString().equals_lower("echo"));
6894   Lexer.Lex();
6895 
6896   std::string Message = parseStringTo(AsmToken::EndOfStatement);
6897   llvm::outs() << Message;
6898   if (!StringRef(Message).endswith("\n"))
6899     llvm::outs() << '\n';
6900   return false;
6901 }
6902 
6903 // We are comparing pointers, but the pointers are relative to a single string.
6904 // Thus, this should always be deterministic.
rewritesSort(const AsmRewrite * AsmRewriteA,const AsmRewrite * AsmRewriteB)6905 static int rewritesSort(const AsmRewrite *AsmRewriteA,
6906                         const AsmRewrite *AsmRewriteB) {
6907   if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
6908     return -1;
6909   if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
6910     return 1;
6911 
6912   // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
6913   // rewrite to the same location.  Make sure the SizeDirective rewrite is
6914   // performed first, then the Imm/ImmPrefix and finally the Input/Output.  This
6915   // ensures the sort algorithm is stable.
6916   if (AsmRewritePrecedence[AsmRewriteA->Kind] >
6917       AsmRewritePrecedence[AsmRewriteB->Kind])
6918     return -1;
6919 
6920   if (AsmRewritePrecedence[AsmRewriteA->Kind] <
6921       AsmRewritePrecedence[AsmRewriteB->Kind])
6922     return 1;
6923   llvm_unreachable("Unstable rewrite sort.");
6924 }
6925 
defineMacro(StringRef Name,StringRef Value)6926 bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
6927   Variable &Var = Variables[Name.lower()];
6928   if (Var.Name.empty()) {
6929     Var.Name = Name;
6930   } else if (!Var.Redefinable) {
6931     return TokError("invalid variable redefinition");
6932   }
6933   Var.Redefinable = true;
6934   Var.IsText = true;
6935   Var.TextValue = Value.str();
6936   return false;
6937 }
6938 
lookUpField(StringRef Name,AsmFieldInfo & Info) const6939 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
6940   const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
6941   const StringRef Base = BaseMember.first, Member = BaseMember.second;
6942   return lookUpField(Base, Member, Info);
6943 }
6944 
lookUpField(StringRef Base,StringRef Member,AsmFieldInfo & Info) const6945 bool MasmParser::lookUpField(StringRef Base, StringRef Member,
6946                              AsmFieldInfo &Info) const {
6947   if (Base.empty())
6948     return true;
6949 
6950   AsmFieldInfo BaseInfo;
6951   if (Base.contains('.') && !lookUpField(Base, BaseInfo))
6952     Base = BaseInfo.Type.Name;
6953 
6954   auto StructIt = Structs.find(Base.lower());
6955   auto TypeIt = KnownType.find(Base.lower());
6956   if (TypeIt != KnownType.end()) {
6957     StructIt = Structs.find(TypeIt->second.Name.lower());
6958   }
6959   if (StructIt != Structs.end())
6960     return lookUpField(StructIt->second, Member, Info);
6961 
6962   return true;
6963 }
6964 
lookUpField(const StructInfo & Structure,StringRef Member,AsmFieldInfo & Info) const6965 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
6966                              AsmFieldInfo &Info) const {
6967   if (Member.empty()) {
6968     Info.Type.Name = Structure.Name;
6969     Info.Type.Size = Structure.Size;
6970     Info.Type.ElementSize = Structure.Size;
6971     Info.Type.Length = 1;
6972     return false;
6973   }
6974 
6975   std::pair<StringRef, StringRef> Split = Member.split('.');
6976   const StringRef FieldName = Split.first, FieldMember = Split.second;
6977 
6978   auto StructIt = Structs.find(FieldName.lower());
6979   if (StructIt != Structs.end())
6980     return lookUpField(StructIt->second, FieldMember, Info);
6981 
6982   auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
6983   if (FieldIt == Structure.FieldsByName.end())
6984     return true;
6985 
6986   const FieldInfo &Field = Structure.Fields[FieldIt->second];
6987   if (FieldMember.empty()) {
6988     Info.Offset += Field.Offset;
6989     Info.Type.Size = Field.SizeOf;
6990     Info.Type.ElementSize = Field.Type;
6991     Info.Type.Length = Field.LengthOf;
6992     if (Field.Contents.FT == FT_STRUCT)
6993       Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
6994     else
6995       Info.Type.Name = "";
6996     return false;
6997   }
6998 
6999   if (Field.Contents.FT != FT_STRUCT)
7000     return true;
7001   const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
7002 
7003   if (lookUpField(StructInfo.Structure, FieldMember, Info))
7004     return true;
7005 
7006   Info.Offset += Field.Offset;
7007   return false;
7008 }
7009 
lookUpType(StringRef Name,AsmTypeInfo & Info) const7010 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
7011   unsigned Size = StringSwitch<unsigned>(Name)
7012                       .CasesLower("byte", "db", "sbyte", 1)
7013                       .CasesLower("word", "dw", "sword", 2)
7014                       .CasesLower("dword", "dd", "sdword", 4)
7015                       .CasesLower("fword", "df", 6)
7016                       .CasesLower("qword", "dq", "sqword", 8)
7017                       .CaseLower("real4", 4)
7018                       .CaseLower("real8", 8)
7019                       .CaseLower("real10", 10)
7020                       .Default(0);
7021   if (Size) {
7022     Info.Name = Name;
7023     Info.ElementSize = Size;
7024     Info.Length = 1;
7025     Info.Size = Size;
7026     return false;
7027   }
7028 
7029   auto StructIt = Structs.find(Name.lower());
7030   if (StructIt != Structs.end()) {
7031     const StructInfo &Structure = StructIt->second;
7032     Info.Name = Name;
7033     Info.ElementSize = Structure.Size;
7034     Info.Length = 1;
7035     Info.Size = Structure.Size;
7036     return false;
7037   }
7038 
7039   return true;
7040 }
7041 
parseMSInlineAsm(void * AsmLoc,std::string & AsmString,unsigned & NumOutputs,unsigned & NumInputs,SmallVectorImpl<std::pair<void *,bool>> & OpDecls,SmallVectorImpl<std::string> & Constraints,SmallVectorImpl<std::string> & Clobbers,const MCInstrInfo * MII,const MCInstPrinter * IP,MCAsmParserSemaCallback & SI)7042 bool MasmParser::parseMSInlineAsm(
7043     void *AsmLoc, std::string &AsmString, unsigned &NumOutputs,
7044     unsigned &NumInputs, SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
7045     SmallVectorImpl<std::string> &Constraints,
7046     SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
7047     const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
7048   SmallVector<void *, 4> InputDecls;
7049   SmallVector<void *, 4> OutputDecls;
7050   SmallVector<bool, 4> InputDeclsAddressOf;
7051   SmallVector<bool, 4> OutputDeclsAddressOf;
7052   SmallVector<std::string, 4> InputConstraints;
7053   SmallVector<std::string, 4> OutputConstraints;
7054   SmallVector<unsigned, 4> ClobberRegs;
7055 
7056   SmallVector<AsmRewrite, 4> AsmStrRewrites;
7057 
7058   // Prime the lexer.
7059   Lex();
7060 
7061   // While we have input, parse each statement.
7062   unsigned InputIdx = 0;
7063   unsigned OutputIdx = 0;
7064   while (getLexer().isNot(AsmToken::Eof)) {
7065     // Parse curly braces marking block start/end.
7066     if (parseCurlyBlockScope(AsmStrRewrites))
7067       continue;
7068 
7069     ParseStatementInfo Info(&AsmStrRewrites);
7070     bool StatementErr = parseStatement(Info, &SI);
7071 
7072     if (StatementErr || Info.ParseError) {
7073       // Emit pending errors if any exist.
7074       printPendingErrors();
7075       return true;
7076     }
7077 
7078     // No pending error should exist here.
7079     assert(!hasPendingError() && "unexpected error from parseStatement");
7080 
7081     if (Info.Opcode == ~0U)
7082       continue;
7083 
7084     const MCInstrDesc &Desc = MII->get(Info.Opcode);
7085 
7086     // Build the list of clobbers, outputs and inputs.
7087     for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
7088       MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
7089 
7090       // Register operand.
7091       if (Operand.isReg() && !Operand.needAddressOf() &&
7092           !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
7093         unsigned NumDefs = Desc.getNumDefs();
7094         // Clobber.
7095         if (NumDefs && Operand.getMCOperandNum() < NumDefs)
7096           ClobberRegs.push_back(Operand.getReg());
7097         continue;
7098       }
7099 
7100       // Expr/Input or Output.
7101       StringRef SymName = Operand.getSymName();
7102       if (SymName.empty())
7103         continue;
7104 
7105       void *OpDecl = Operand.getOpDecl();
7106       if (!OpDecl)
7107         continue;
7108 
7109       StringRef Constraint = Operand.getConstraint();
7110       if (Operand.isImm()) {
7111         // Offset as immediate.
7112         if (Operand.isOffsetOfLocal())
7113           Constraint = "r";
7114         else
7115           Constraint = "i";
7116       }
7117 
7118       bool isOutput = (i == 1) && Desc.mayStore();
7119       SMLoc Start = SMLoc::getFromPointer(SymName.data());
7120       if (isOutput) {
7121         ++InputIdx;
7122         OutputDecls.push_back(OpDecl);
7123         OutputDeclsAddressOf.push_back(Operand.needAddressOf());
7124         OutputConstraints.push_back(("=" + Constraint).str());
7125         AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
7126       } else {
7127         InputDecls.push_back(OpDecl);
7128         InputDeclsAddressOf.push_back(Operand.needAddressOf());
7129         InputConstraints.push_back(Constraint.str());
7130         if (Desc.OpInfo[i - 1].isBranchTarget())
7131           AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
7132         else
7133           AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
7134       }
7135     }
7136 
7137     // Consider implicit defs to be clobbers.  Think of cpuid and push.
7138     ArrayRef<MCPhysReg> ImpDefs(Desc.getImplicitDefs(),
7139                                 Desc.getNumImplicitDefs());
7140     llvm::append_range(ClobberRegs, ImpDefs);
7141   }
7142 
7143   // Set the number of Outputs and Inputs.
7144   NumOutputs = OutputDecls.size();
7145   NumInputs = InputDecls.size();
7146 
7147   // Set the unique clobbers.
7148   array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
7149   ClobberRegs.erase(std::unique(ClobberRegs.begin(), ClobberRegs.end()),
7150                     ClobberRegs.end());
7151   Clobbers.assign(ClobberRegs.size(), std::string());
7152   for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
7153     raw_string_ostream OS(Clobbers[I]);
7154     IP->printRegName(OS, ClobberRegs[I]);
7155   }
7156 
7157   // Merge the various outputs and inputs.  Output are expected first.
7158   if (NumOutputs || NumInputs) {
7159     unsigned NumExprs = NumOutputs + NumInputs;
7160     OpDecls.resize(NumExprs);
7161     Constraints.resize(NumExprs);
7162     for (unsigned i = 0; i < NumOutputs; ++i) {
7163       OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
7164       Constraints[i] = OutputConstraints[i];
7165     }
7166     for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
7167       OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
7168       Constraints[j] = InputConstraints[i];
7169     }
7170   }
7171 
7172   // Build the IR assembly string.
7173   std::string AsmStringIR;
7174   raw_string_ostream OS(AsmStringIR);
7175   StringRef ASMString =
7176       SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
7177   const char *AsmStart = ASMString.begin();
7178   const char *AsmEnd = ASMString.end();
7179   array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
7180   for (auto it = AsmStrRewrites.begin(); it != AsmStrRewrites.end(); ++it) {
7181     const AsmRewrite &AR = *it;
7182     // Check if this has already been covered by another rewrite...
7183     if (AR.Done)
7184       continue;
7185     AsmRewriteKind Kind = AR.Kind;
7186 
7187     const char *Loc = AR.Loc.getPointer();
7188     assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
7189 
7190     // Emit everything up to the immediate/expression.
7191     if (unsigned Len = Loc - AsmStart)
7192       OS << StringRef(AsmStart, Len);
7193 
7194     // Skip the original expression.
7195     if (Kind == AOK_Skip) {
7196       AsmStart = Loc + AR.Len;
7197       continue;
7198     }
7199 
7200     unsigned AdditionalSkip = 0;
7201     // Rewrite expressions in $N notation.
7202     switch (Kind) {
7203     default:
7204       break;
7205     case AOK_IntelExpr:
7206       assert(AR.IntelExp.isValid() && "cannot write invalid intel expression");
7207       if (AR.IntelExp.NeedBracs)
7208         OS << "[";
7209       if (AR.IntelExp.hasBaseReg())
7210         OS << AR.IntelExp.BaseReg;
7211       if (AR.IntelExp.hasIndexReg())
7212         OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
7213            << AR.IntelExp.IndexReg;
7214       if (AR.IntelExp.Scale > 1)
7215         OS << " * $$" << AR.IntelExp.Scale;
7216       if (AR.IntelExp.hasOffset()) {
7217         if (AR.IntelExp.hasRegs())
7218           OS << " + ";
7219         // Fuse this rewrite with a rewrite of the offset name, if present.
7220         StringRef OffsetName = AR.IntelExp.OffsetName;
7221         SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
7222         size_t OffsetLen = OffsetName.size();
7223         auto rewrite_it = std::find_if(
7224             it, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
7225               return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
7226                      (FusingAR.Kind == AOK_Input ||
7227                       FusingAR.Kind == AOK_CallInput);
7228             });
7229         if (rewrite_it == AsmStrRewrites.end()) {
7230           OS << "offset " << OffsetName;
7231         } else if (rewrite_it->Kind == AOK_CallInput) {
7232           OS << "${" << InputIdx++ << ":P}";
7233           rewrite_it->Done = true;
7234         } else {
7235           OS << '$' << InputIdx++;
7236           rewrite_it->Done = true;
7237         }
7238       }
7239       if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
7240         OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
7241       if (AR.IntelExp.NeedBracs)
7242         OS << "]";
7243       break;
7244     case AOK_Label:
7245       OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
7246       break;
7247     case AOK_Input:
7248       OS << '$' << InputIdx++;
7249       break;
7250     case AOK_CallInput:
7251       OS << "${" << InputIdx++ << ":P}";
7252       break;
7253     case AOK_Output:
7254       OS << '$' << OutputIdx++;
7255       break;
7256     case AOK_SizeDirective:
7257       switch (AR.Val) {
7258       default: break;
7259       case 8:  OS << "byte ptr "; break;
7260       case 16: OS << "word ptr "; break;
7261       case 32: OS << "dword ptr "; break;
7262       case 64: OS << "qword ptr "; break;
7263       case 80: OS << "xword ptr "; break;
7264       case 128: OS << "xmmword ptr "; break;
7265       case 256: OS << "ymmword ptr "; break;
7266       }
7267       break;
7268     case AOK_Emit:
7269       OS << ".byte";
7270       break;
7271     case AOK_Align: {
7272       // MS alignment directives are measured in bytes. If the native assembler
7273       // measures alignment in bytes, we can pass it straight through.
7274       OS << ".align";
7275       if (getContext().getAsmInfo()->getAlignmentIsInBytes())
7276         break;
7277 
7278       // Alignment is in log2 form, so print that instead and skip the original
7279       // immediate.
7280       unsigned Val = AR.Val;
7281       OS << ' ' << Val;
7282       assert(Val < 10 && "Expected alignment less then 2^10.");
7283       AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
7284       break;
7285     }
7286     case AOK_EVEN:
7287       OS << ".even";
7288       break;
7289     case AOK_EndOfStatement:
7290       OS << "\n\t";
7291       break;
7292     }
7293 
7294     // Skip the original expression.
7295     AsmStart = Loc + AR.Len + AdditionalSkip;
7296   }
7297 
7298   // Emit the remainder of the asm string.
7299   if (AsmStart != AsmEnd)
7300     OS << StringRef(AsmStart, AsmEnd - AsmStart);
7301 
7302   AsmString = OS.str();
7303   return false;
7304 }
7305 
7306 /// Create an MCAsmParser instance.
createMCMasmParser(SourceMgr & SM,MCContext & C,MCStreamer & Out,const MCAsmInfo & MAI,unsigned CB)7307 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C,
7308                                       MCStreamer &Out, const MCAsmInfo &MAI,
7309                                       unsigned CB) {
7310   return new MasmParser(SM, C, Out, MAI, CB);
7311 }
7312