1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 11 #define LLVM_MC_MCPARSER_MCASMLEXER_H 12 13 #include "llvm/ADT/APInt.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/Support/Compiler.h" 16 #include "llvm/Support/DataTypes.h" 17 #include "llvm/Support/SMLoc.h" 18 19 namespace llvm { 20 21 /// Target independent representation for an assembler token. 22 class AsmToken { 23 public: 24 enum TokenKind { 25 // Markers 26 Eof, Error, 27 28 // String values. 29 Identifier, 30 String, 31 32 // Integer values. 33 Integer, 34 BigNum, // larger than 64 bits 35 36 // Real values. 37 Real, 38 39 // No-value. 40 EndOfStatement, 41 Colon, 42 Space, 43 Plus, Minus, Tilde, 44 Slash, // '/' 45 BackSlash, // '\' 46 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 47 Star, Dot, Comma, Dollar, Equal, EqualEqual, 48 49 Pipe, PipePipe, Caret, 50 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 51 Less, LessEqual, LessLess, LessGreater, 52 Greater, GreaterEqual, GreaterGreater, At 53 }; 54 55 private: 56 TokenKind Kind; 57 58 /// A reference to the entire token contents; this is always a pointer into 59 /// a memory buffer owned by the source manager. 60 StringRef Str; 61 62 APInt IntVal; 63 64 public: AsmToken()65 AsmToken() {} AsmToken(TokenKind Kind,StringRef Str,APInt IntVal)66 AsmToken(TokenKind Kind, StringRef Str, APInt IntVal) 67 : Kind(Kind), Str(Str), IntVal(IntVal) {} 68 AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0) Kind(Kind)69 : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {} 70 getKind()71 TokenKind getKind() const { return Kind; } is(TokenKind K)72 bool is(TokenKind K) const { return Kind == K; } isNot(TokenKind K)73 bool isNot(TokenKind K) const { return Kind != K; } 74 75 SMLoc getLoc() const; 76 SMLoc getEndLoc() const; 77 SMRange getLocRange() const; 78 79 /// Get the contents of a string token (without quotes). getStringContents(bool & valid)80 StringRef getStringContents(bool &valid) const { 81 //assert(Kind == String && "This token isn't a string!"); 82 if (Kind != String) { 83 valid = false; 84 return nullptr; 85 } 86 valid = true; 87 return Str.slice(1, Str.size() - 1); 88 } 89 90 /// Get the identifier string for the current token, which should be an 91 /// identifier or a string. This gets the portion of the string which should 92 /// be used as the identifier, e.g., it does not include the quotes on 93 /// strings. getIdentifier()94 StringRef getIdentifier() const { 95 if (Kind == Identifier) 96 return getString(); 97 bool valid; 98 return getStringContents(valid); 99 } 100 101 /// Get the string for the current token, this includes all characters (for 102 /// example, the quotes on strings) in the token. 103 /// 104 /// The returned StringRef points into the source manager's memory buffer, and 105 /// is safe to store across calls to Lex(). getString()106 StringRef getString() const { return Str; } 107 108 // FIXME: Don't compute this in advance, it makes every token larger, and is 109 // also not generally what we want (it is nicer for recovery etc. to lex 123br 110 // as a single token, then diagnose as an invalid number). getIntVal(bool & valid)111 int64_t getIntVal(bool &valid) const { 112 //assert(Kind == Integer && "This token isn't an integer!"); 113 if (Kind != Integer) { 114 valid = false; 115 return -1; 116 } 117 valid = true; 118 return IntVal.getZExtValue(); 119 } 120 getAPIntVal(bool & valid)121 APInt getAPIntVal(bool &valid) const { 122 //assert((Kind == Integer || Kind == BigNum) && 123 // "This token isn't an integer!"); 124 if (Kind != Integer && Kind != BigNum) { 125 valid = false; 126 //return APInt(-1); 127 } 128 valid = true; 129 return IntVal; 130 } 131 }; 132 133 /// Generic assembler lexer interface, for use by target specific assembly 134 /// lexers. 135 class MCAsmLexer { 136 /// The current token, stored in the base class for faster access. 137 SmallVector<AsmToken, 1> CurTok; 138 139 /// The location and description of the current error 140 SMLoc ErrLoc; 141 std::string Err; 142 143 MCAsmLexer(const MCAsmLexer &) = delete; 144 void operator=(const MCAsmLexer &) = delete; 145 protected: // Can only create subclasses. 146 const char *TokStart; 147 bool SkipSpace; 148 bool AllowAtInIdentifier; 149 150 MCAsmLexer(); 151 152 virtual AsmToken LexToken() = 0; 153 SetError(SMLoc errLoc,const std::string & err)154 void SetError(SMLoc errLoc, const std::string &err) { 155 ErrLoc = errLoc; 156 Err = err; 157 } 158 159 public: 160 virtual ~MCAsmLexer(); 161 162 /// Consume the next token from the input stream and return it. 163 /// 164 /// The lexer will continuosly return the end-of-file token once the end of 165 /// the main input file has been reached. Lex()166 const AsmToken &Lex() { 167 assert(!CurTok.empty()); 168 CurTok.erase(CurTok.begin()); 169 if (CurTok.empty()) 170 CurTok.emplace_back(LexToken()); 171 return CurTok.front(); 172 } 173 UnLex(AsmToken const & Token)174 void UnLex(AsmToken const &Token) { 175 CurTok.insert(CurTok.begin(), Token); 176 } 177 178 virtual StringRef LexUntilEndOfStatement() = 0; 179 180 /// Get the current source location. 181 SMLoc getLoc() const; 182 183 /// Get the current (last) lexed token. getTok()184 const AsmToken &getTok() const { 185 return CurTok[0]; 186 } 187 188 /// Look ahead at the next token to be lexed. 189 const AsmToken peekTok(bool ShouldSkipSpace = true) { 190 AsmToken Tok; 191 192 MutableArrayRef<AsmToken> Buf(Tok); 193 if (peekTokens(Buf, ShouldSkipSpace) != 1) 194 return AsmToken(AsmToken::Error, nullptr); 195 196 return Tok; 197 } 198 199 /// Look ahead an arbitrary number of tokens. 200 virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf, 201 bool ShouldSkipSpace = true) = 0; 202 203 /// Get the current error location getErrLoc()204 SMLoc getErrLoc() { 205 return ErrLoc; 206 } 207 208 /// Get the current error string getErr()209 const std::string &getErr() { 210 return Err; 211 } 212 213 /// Get the kind of current token. getKind()214 AsmToken::TokenKind getKind() const { return getTok().getKind(); } 215 216 /// Check if the current token has kind \p K. is(AsmToken::TokenKind K)217 bool is(AsmToken::TokenKind K) const { return getTok().is(K); } 218 219 /// Check if the current token has kind \p K. isNot(AsmToken::TokenKind K)220 bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); } 221 222 /// Set whether spaces should be ignored by the lexer setSkipSpace(bool val)223 void setSkipSpace(bool val) { SkipSpace = val; } 224 getAllowAtInIdentifier()225 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } setAllowAtInIdentifier(bool v)226 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } 227 }; 228 229 } // End llvm namespace 230 231 #endif 232