1 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
10 #define LLVM_MC_MCPARSER_MCASMLEXER_H
11 
12 #include "llvm/ADT/ArrayRef.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/MC/MCAsmMacro.h"
15 #include <algorithm>
16 #include <cassert>
17 #include <cstddef>
18 #include <cstdint>
19 #include <string>
20 
21 namespace llvm {
22 
23 /// A callback class which is notified of each comment in an assembly file as
24 /// it is lexed.
25 class AsmCommentConsumer {
26 public:
27   virtual ~AsmCommentConsumer() = default;
28 
29   /// Callback function for when a comment is lexed. Loc is the start of the
30   /// comment text (excluding the comment-start marker). CommentText is the text
31   /// of the comment, excluding the comment start and end markers, and the
32   /// newline for single-line comments.
33   virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
34 };
35 
36 
37 /// Generic assembler lexer interface, for use by target specific assembly
38 /// lexers.
39 class MCAsmLexer {
40   /// The current token, stored in the base class for faster access.
41   SmallVector<AsmToken, 1> CurTok;
42 
43   /// The location and description of the current error
44   SMLoc ErrLoc;
45   std::string Err;
46 
47 protected: // Can only create subclasses.
48   const char *TokStart = nullptr;
49   bool SkipSpace = true;
50   bool AllowAtInIdentifier;
51   bool IsAtStartOfStatement = true;
52   bool LexMasmHexFloats = false;
53   bool LexMasmIntegers = false;
54   bool LexMasmStrings = false;
55   bool UseMasmDefaultRadix = false;
56   unsigned DefaultRadix = 10;
57   AsmCommentConsumer *CommentConsumer = nullptr;
58 
59   MCAsmLexer();
60 
61   virtual AsmToken LexToken() = 0;
62 
63   void SetError(SMLoc errLoc, const std::string &err) {
64     ErrLoc = errLoc;
65     Err = err;
66   }
67 
68 public:
69   MCAsmLexer(const MCAsmLexer &) = delete;
70   MCAsmLexer &operator=(const MCAsmLexer &) = delete;
71   virtual ~MCAsmLexer();
72 
73   /// Consume the next token from the input stream and return it.
74   ///
75   /// The lexer will continuously return the end-of-file token once the end of
76   /// the main input file has been reached.
77   const AsmToken &Lex() {
78     assert(!CurTok.empty());
79     // Mark if we parsing out a EndOfStatement.
80     IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
81     CurTok.erase(CurTok.begin());
82     // LexToken may generate multiple tokens via UnLex but will always return
83     // the first one. Place returned value at head of CurTok vector.
84     if (CurTok.empty()) {
85       AsmToken T = LexToken();
86       CurTok.insert(CurTok.begin(), T);
87     }
88     return CurTok.front();
89   }
90 
91   void UnLex(AsmToken const &Token) {
92     IsAtStartOfStatement = false;
93     CurTok.insert(CurTok.begin(), Token);
94   }
95 
96   bool isAtStartOfStatement() { return IsAtStartOfStatement; }
97 
98   virtual StringRef LexUntilEndOfStatement() = 0;
99 
100   /// Get the current source location.
101   SMLoc getLoc() const;
102 
103   /// Get the current (last) lexed token.
104   const AsmToken &getTok() const {
105     return CurTok[0];
106   }
107 
108   /// Look ahead at the next token to be lexed.
109   const AsmToken peekTok(bool ShouldSkipSpace = true) {
110     AsmToken Tok;
111 
112     MutableArrayRef<AsmToken> Buf(Tok);
113     size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
114 
115     assert(ReadCount == 1);
116     (void)ReadCount;
117 
118     return Tok;
119   }
120 
121   /// Look ahead an arbitrary number of tokens.
122   virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
123                             bool ShouldSkipSpace = true) = 0;
124 
125   /// Get the current error location
126   SMLoc getErrLoc() {
127     return ErrLoc;
128   }
129 
130   /// Get the current error string
131   const std::string &getErr() {
132     return Err;
133   }
134 
135   /// Get the kind of current token.
136   AsmToken::TokenKind getKind() const { return getTok().getKind(); }
137 
138   /// Check if the current token has kind \p K.
139   bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
140 
141   /// Check if the current token has kind \p K.
142   bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
143 
144   /// Set whether spaces should be ignored by the lexer
145   void setSkipSpace(bool val) { SkipSpace = val; }
146 
147   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
148   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
149 
150   void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
151     this->CommentConsumer = CommentConsumer;
152   }
153 
154   /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified
155   /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]).
156   void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
157 
158   /// Set whether to use masm-style default-radix integer literals. If disabled,
159   /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]).
160   void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; }
161 
162   unsigned getMasmDefaultRadix() const { return DefaultRadix; }
163   void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
164 
165   /// Set whether to lex masm-style hex float literals, such as 3f800000r.
166   void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; }
167 
168   /// Set whether to lex masm-style string literals, such as 'Can''t find file'
169   /// and "This ""value"" not found".
170   void setLexMasmStrings(bool V) { LexMasmStrings = V; }
171 };
172 
173 } // end namespace llvm
174 
175 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H
176