1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
12 
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/Support/Compiler.h"
16 #include "llvm/Support/DataTypes.h"
17 #include "llvm/Support/SMLoc.h"
18 
19 namespace llvm {
20 
21 /// Target independent representation for an assembler token.
22 class AsmToken {
23 public:
24   enum TokenKind {
25     // Markers
26     Eof, Error,
27 
28     // String values.
29     Identifier,
30     String,
31 
32     // Integer values.
33     Integer,
34     BigNum, // larger than 64 bits
35 
36     // Real values.
37     Real,
38 
39     // No-value.
40     EndOfStatement,
41     Colon,
42     Space,
43     Plus, Minus, Tilde,
44     Slash,    // '/'
45     BackSlash, // '\'
46     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
47     Star, Dot, Comma, Dollar, Equal, EqualEqual,
48 
49     Pipe, PipePipe, Caret,
50     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
51     Less, LessEqual, LessLess, LessGreater,
52     Greater, GreaterEqual, GreaterGreater, At
53   };
54 
55 private:
56   TokenKind Kind;
57 
58   /// A reference to the entire token contents; this is always a pointer into
59   /// a memory buffer owned by the source manager.
60   StringRef Str;
61 
62   APInt IntVal;
63 
64 public:
AsmToken()65   AsmToken() {}
AsmToken(TokenKind Kind,StringRef Str,APInt IntVal)66   AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
67       : Kind(Kind), Str(Str), IntVal(IntVal) {}
68   AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
Kind(Kind)69       : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
70 
getKind()71   TokenKind getKind() const { return Kind; }
is(TokenKind K)72   bool is(TokenKind K) const { return Kind == K; }
isNot(TokenKind K)73   bool isNot(TokenKind K) const { return Kind != K; }
74 
75   SMLoc getLoc() const;
76   SMLoc getEndLoc() const;
77   SMRange getLocRange() const;
78 
79   /// Get the contents of a string token (without quotes).
getStringContents(bool & valid)80   StringRef getStringContents(bool &valid) const {
81     //assert(Kind == String && "This token isn't a string!");
82     if (Kind != String) {
83         valid = false;
84         return nullptr;
85     }
86     valid = true;
87     return Str.slice(1, Str.size() - 1);
88   }
89 
90   /// Get the identifier string for the current token, which should be an
91   /// identifier or a string. This gets the portion of the string which should
92   /// be used as the identifier, e.g., it does not include the quotes on
93   /// strings.
getIdentifier()94   StringRef getIdentifier() const {
95     if (Kind == Identifier)
96       return getString();
97     bool valid;
98     return getStringContents(valid);
99   }
100 
101   /// Get the string for the current token, this includes all characters (for
102   /// example, the quotes on strings) in the token.
103   ///
104   /// The returned StringRef points into the source manager's memory buffer, and
105   /// is safe to store across calls to Lex().
getString()106   StringRef getString() const { return Str; }
107 
108   // FIXME: Don't compute this in advance, it makes every token larger, and is
109   // also not generally what we want (it is nicer for recovery etc. to lex 123br
110   // as a single token, then diagnose as an invalid number).
getIntVal(bool & valid)111   int64_t getIntVal(bool &valid) const {
112     //assert(Kind == Integer && "This token isn't an integer!");
113     if (Kind != Integer) {
114         valid = false;
115         return -1;
116     }
117     valid = true;
118     return IntVal.getZExtValue();
119   }
120 
getAPIntVal(bool & valid)121   APInt getAPIntVal(bool &valid) const {
122     //assert((Kind == Integer || Kind == BigNum) &&
123     //       "This token isn't an integer!");
124     if (Kind != Integer && Kind != BigNum) {
125         valid = false;
126         //return APInt(-1);
127     }
128     valid = true;
129     return IntVal;
130   }
131 };
132 
133 /// Generic assembler lexer interface, for use by target specific assembly
134 /// lexers.
135 class MCAsmLexer {
136   /// The current token, stored in the base class for faster access.
137   SmallVector<AsmToken, 1> CurTok;
138 
139   /// The location and description of the current error
140   SMLoc ErrLoc;
141   std::string Err;
142 
143   MCAsmLexer(const MCAsmLexer &) = delete;
144   void operator=(const MCAsmLexer &) = delete;
145 protected: // Can only create subclasses.
146   const char *TokStart;
147   bool SkipSpace;
148   bool AllowAtInIdentifier;
149 
150   MCAsmLexer();
151 
152   virtual AsmToken LexToken() = 0;
153 
SetError(SMLoc errLoc,const std::string & err)154   void SetError(SMLoc errLoc, const std::string &err) {
155     ErrLoc = errLoc;
156     Err = err;
157   }
158 
159 public:
160   virtual ~MCAsmLexer();
161 
162   /// Consume the next token from the input stream and return it.
163   ///
164   /// The lexer will continuosly return the end-of-file token once the end of
165   /// the main input file has been reached.
Lex()166   const AsmToken &Lex() {
167     assert(!CurTok.empty());
168     CurTok.erase(CurTok.begin());
169     if (CurTok.empty())
170       CurTok.emplace_back(LexToken());
171     return CurTok.front();
172   }
173 
UnLex(AsmToken const & Token)174   void UnLex(AsmToken const &Token) {
175     CurTok.insert(CurTok.begin(), Token);
176   }
177 
178   virtual StringRef LexUntilEndOfStatement() = 0;
179 
180   /// Get the current source location.
181   SMLoc getLoc() const;
182 
183   /// Get the current (last) lexed token.
getTok()184   const AsmToken &getTok() const {
185     return CurTok[0];
186   }
187 
188   /// Look ahead at the next token to be lexed.
189   const AsmToken peekTok(bool ShouldSkipSpace = true) {
190     AsmToken Tok;
191 
192     MutableArrayRef<AsmToken> Buf(Tok);
193     if (peekTokens(Buf, ShouldSkipSpace) != 1)
194         return AsmToken(AsmToken::Error, nullptr);
195 
196     return Tok;
197   }
198 
199   /// Look ahead an arbitrary number of tokens.
200   virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
201                             bool ShouldSkipSpace = true) = 0;
202 
203   /// Get the current error location
getErrLoc()204   SMLoc getErrLoc() {
205     return ErrLoc;
206   }
207 
208   /// Get the current error string
getErr()209   const std::string &getErr() {
210     return Err;
211   }
212 
213   /// Get the kind of current token.
getKind()214   AsmToken::TokenKind getKind() const { return getTok().getKind(); }
215 
216   /// Check if the current token has kind \p K.
is(AsmToken::TokenKind K)217   bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
218 
219   /// Check if the current token has kind \p K.
isNot(AsmToken::TokenKind K)220   bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
221 
222   /// Set whether spaces should be ignored by the lexer
setSkipSpace(bool val)223   void setSkipSpace(bool val) { SkipSpace = val; }
224 
getAllowAtInIdentifier()225   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
setAllowAtInIdentifier(bool v)226   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
227 };
228 
229 } // End llvm namespace
230 
231 #endif
232