1f4a2713aSLionel Sambuc //===--- Lexer.h - C Language Family Lexer ----------------------*- C++ -*-===// 2f4a2713aSLionel Sambuc // 3f4a2713aSLionel Sambuc // The LLVM Compiler Infrastructure 4f4a2713aSLionel Sambuc // 5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source 6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details. 7f4a2713aSLionel Sambuc // 8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===// 9f4a2713aSLionel Sambuc // 10f4a2713aSLionel Sambuc // This file defines the Lexer interface. 11f4a2713aSLionel Sambuc // 12f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===// 13f4a2713aSLionel Sambuc 14*0a6a1f1dSLionel Sambuc #ifndef LLVM_CLANG_LEX_LEXER_H 15*0a6a1f1dSLionel Sambuc #define LLVM_CLANG_LEX_LEXER_H 16f4a2713aSLionel Sambuc 17f4a2713aSLionel Sambuc #include "clang/Basic/LangOptions.h" 18f4a2713aSLionel Sambuc #include "clang/Lex/PreprocessorLexer.h" 19f4a2713aSLionel Sambuc #include "llvm/ADT/SmallVector.h" 20f4a2713aSLionel Sambuc #include <cassert> 21f4a2713aSLionel Sambuc #include <string> 22f4a2713aSLionel Sambuc 23f4a2713aSLionel Sambuc namespace clang { 24f4a2713aSLionel Sambuc class DiagnosticsEngine; 25f4a2713aSLionel Sambuc class SourceManager; 26f4a2713aSLionel Sambuc class Preprocessor; 27f4a2713aSLionel Sambuc class DiagnosticBuilder; 28f4a2713aSLionel Sambuc 29f4a2713aSLionel Sambuc /// ConflictMarkerKind - Kinds of conflict marker which the lexer might be 30f4a2713aSLionel Sambuc /// recovering from. 31f4a2713aSLionel Sambuc enum ConflictMarkerKind { 32f4a2713aSLionel Sambuc /// Not within a conflict marker. 33f4a2713aSLionel Sambuc CMK_None, 34f4a2713aSLionel Sambuc /// A normal or diff3 conflict marker, initiated by at least 7 "<"s, 35f4a2713aSLionel Sambuc /// separated by at least 7 "="s or "|"s, and terminated by at least 7 ">"s. 36f4a2713aSLionel Sambuc CMK_Normal, 37f4a2713aSLionel Sambuc /// A Perforce-style conflict marker, initiated by 4 ">"s, 38f4a2713aSLionel Sambuc /// separated by 4 "="s, and terminated by 4 "<"s. 39f4a2713aSLionel Sambuc CMK_Perforce 40f4a2713aSLionel Sambuc }; 41f4a2713aSLionel Sambuc 42f4a2713aSLionel Sambuc /// Lexer - This provides a simple interface that turns a text buffer into a 43f4a2713aSLionel Sambuc /// stream of tokens. This provides no support for file reading or buffering, 44f4a2713aSLionel Sambuc /// or buffering/seeking of tokens, only forward lexing is supported. It relies 45f4a2713aSLionel Sambuc /// on the specified Preprocessor object to handle preprocessor directives, etc. 46f4a2713aSLionel Sambuc class Lexer : public PreprocessorLexer { 47*0a6a1f1dSLionel Sambuc void anchor() override; 48f4a2713aSLionel Sambuc 49f4a2713aSLionel Sambuc //===--------------------------------------------------------------------===// 50f4a2713aSLionel Sambuc // Constant configuration values for this lexer. 51f4a2713aSLionel Sambuc const char *BufferStart; // Start of the buffer. 52f4a2713aSLionel Sambuc const char *BufferEnd; // End of the buffer. 53f4a2713aSLionel Sambuc SourceLocation FileLoc; // Location for start of file. 54f4a2713aSLionel Sambuc LangOptions LangOpts; // LangOpts enabled by this language (cache). 55f4a2713aSLionel Sambuc bool Is_PragmaLexer; // True if lexer for _Pragma handling. 56f4a2713aSLionel Sambuc 57f4a2713aSLionel Sambuc //===--------------------------------------------------------------------===// 58f4a2713aSLionel Sambuc // Context-specific lexing flags set by the preprocessor. 59f4a2713aSLionel Sambuc // 60f4a2713aSLionel Sambuc 61f4a2713aSLionel Sambuc /// ExtendedTokenMode - The lexer can optionally keep comments and whitespace 62f4a2713aSLionel Sambuc /// and return them as tokens. This is used for -C and -CC modes, and 63f4a2713aSLionel Sambuc /// whitespace preservation can be useful for some clients that want to lex 64f4a2713aSLionel Sambuc /// the file in raw mode and get every character from the file. 65f4a2713aSLionel Sambuc /// 66f4a2713aSLionel Sambuc /// When this is set to 2 it returns comments and whitespace. When set to 1 67f4a2713aSLionel Sambuc /// it returns comments, when it is set to 0 it returns normal tokens only. 68f4a2713aSLionel Sambuc unsigned char ExtendedTokenMode; 69f4a2713aSLionel Sambuc 70f4a2713aSLionel Sambuc //===--------------------------------------------------------------------===// 71f4a2713aSLionel Sambuc // Context that changes as the file is lexed. 72f4a2713aSLionel Sambuc // NOTE: any state that mutates when in raw mode must have save/restore code 73f4a2713aSLionel Sambuc // in Lexer::isNextPPTokenLParen. 74f4a2713aSLionel Sambuc 75f4a2713aSLionel Sambuc // BufferPtr - Current pointer into the buffer. This is the next character 76f4a2713aSLionel Sambuc // to be lexed. 77f4a2713aSLionel Sambuc const char *BufferPtr; 78f4a2713aSLionel Sambuc 79f4a2713aSLionel Sambuc // IsAtStartOfLine - True if the next lexed token should get the "start of 80f4a2713aSLionel Sambuc // line" flag set on it. 81f4a2713aSLionel Sambuc bool IsAtStartOfLine; 82f4a2713aSLionel Sambuc 83f4a2713aSLionel Sambuc bool IsAtPhysicalStartOfLine; 84f4a2713aSLionel Sambuc 85f4a2713aSLionel Sambuc bool HasLeadingSpace; 86f4a2713aSLionel Sambuc 87f4a2713aSLionel Sambuc bool HasLeadingEmptyMacro; 88f4a2713aSLionel Sambuc 89f4a2713aSLionel Sambuc // CurrentConflictMarkerState - The kind of conflict marker we are handling. 90f4a2713aSLionel Sambuc ConflictMarkerKind CurrentConflictMarkerState; 91f4a2713aSLionel Sambuc 92f4a2713aSLionel Sambuc Lexer(const Lexer &) LLVM_DELETED_FUNCTION; 93f4a2713aSLionel Sambuc void operator=(const Lexer &) LLVM_DELETED_FUNCTION; 94f4a2713aSLionel Sambuc friend class Preprocessor; 95f4a2713aSLionel Sambuc 96f4a2713aSLionel Sambuc void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd); 97f4a2713aSLionel Sambuc public: 98f4a2713aSLionel Sambuc 99f4a2713aSLionel Sambuc /// Lexer constructor - Create a new lexer object for the specified buffer 100f4a2713aSLionel Sambuc /// with the specified preprocessor managing the lexing process. This lexer 101f4a2713aSLionel Sambuc /// assumes that the associated file buffer and Preprocessor objects will 102f4a2713aSLionel Sambuc /// outlive it, so it doesn't take ownership of either of them. 103f4a2713aSLionel Sambuc Lexer(FileID FID, const llvm::MemoryBuffer *InputBuffer, Preprocessor &PP); 104f4a2713aSLionel Sambuc 105f4a2713aSLionel Sambuc /// Lexer constructor - Create a new raw lexer object. This object is only 106f4a2713aSLionel Sambuc /// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the 107f4a2713aSLionel Sambuc /// text range will outlive it, so it doesn't take ownership of it. 108f4a2713aSLionel Sambuc Lexer(SourceLocation FileLoc, const LangOptions &LangOpts, 109f4a2713aSLionel Sambuc const char *BufStart, const char *BufPtr, const char *BufEnd); 110f4a2713aSLionel Sambuc 111f4a2713aSLionel Sambuc /// Lexer constructor - Create a new raw lexer object. This object is only 112f4a2713aSLionel Sambuc /// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the 113f4a2713aSLionel Sambuc /// text range will outlive it, so it doesn't take ownership of it. 114f4a2713aSLionel Sambuc Lexer(FileID FID, const llvm::MemoryBuffer *InputBuffer, 115f4a2713aSLionel Sambuc const SourceManager &SM, const LangOptions &LangOpts); 116f4a2713aSLionel Sambuc 117f4a2713aSLionel Sambuc /// Create_PragmaLexer: Lexer constructor - Create a new lexer object for 118f4a2713aSLionel Sambuc /// _Pragma expansion. This has a variety of magic semantics that this method 119f4a2713aSLionel Sambuc /// sets up. It returns a new'd Lexer that must be delete'd when done. 120f4a2713aSLionel Sambuc static Lexer *Create_PragmaLexer(SourceLocation SpellingLoc, 121f4a2713aSLionel Sambuc SourceLocation ExpansionLocStart, 122f4a2713aSLionel Sambuc SourceLocation ExpansionLocEnd, 123f4a2713aSLionel Sambuc unsigned TokLen, Preprocessor &PP); 124f4a2713aSLionel Sambuc 125f4a2713aSLionel Sambuc 126f4a2713aSLionel Sambuc /// getLangOpts - Return the language features currently enabled. 127f4a2713aSLionel Sambuc /// NOTE: this lexer modifies features as a file is parsed! getLangOpts()128f4a2713aSLionel Sambuc const LangOptions &getLangOpts() const { return LangOpts; } 129f4a2713aSLionel Sambuc 130f4a2713aSLionel Sambuc /// getFileLoc - Return the File Location for the file we are lexing out of. 131f4a2713aSLionel Sambuc /// The physical location encodes the location where the characters come from, 132f4a2713aSLionel Sambuc /// the virtual location encodes where we should *claim* the characters came 133f4a2713aSLionel Sambuc /// from. Currently this is only used by _Pragma handling. getFileLoc()134f4a2713aSLionel Sambuc SourceLocation getFileLoc() const { return FileLoc; } 135f4a2713aSLionel Sambuc 136f4a2713aSLionel Sambuc private: 137f4a2713aSLionel Sambuc /// Lex - Return the next token in the file. If this is the end of file, it 138f4a2713aSLionel Sambuc /// return the tok::eof token. This implicitly involves the preprocessor. 139f4a2713aSLionel Sambuc bool Lex(Token &Result); 140f4a2713aSLionel Sambuc 141f4a2713aSLionel Sambuc public: 142f4a2713aSLionel Sambuc /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma. isPragmaLexer()143f4a2713aSLionel Sambuc bool isPragmaLexer() const { return Is_PragmaLexer; } 144f4a2713aSLionel Sambuc 145f4a2713aSLionel Sambuc private: 146f4a2713aSLionel Sambuc /// IndirectLex - An indirect call to 'Lex' that can be invoked via 147f4a2713aSLionel Sambuc /// the PreprocessorLexer interface. IndirectLex(Token & Result)148*0a6a1f1dSLionel Sambuc void IndirectLex(Token &Result) override { Lex(Result); } 149f4a2713aSLionel Sambuc 150f4a2713aSLionel Sambuc public: 151f4a2713aSLionel Sambuc /// LexFromRawLexer - Lex a token from a designated raw lexer (one with no 152f4a2713aSLionel Sambuc /// associated preprocessor object. Return true if the 'next character to 153f4a2713aSLionel Sambuc /// read' pointer points at the end of the lexer buffer, false otherwise. LexFromRawLexer(Token & Result)154f4a2713aSLionel Sambuc bool LexFromRawLexer(Token &Result) { 155f4a2713aSLionel Sambuc assert(LexingRawMode && "Not already in raw mode!"); 156f4a2713aSLionel Sambuc Lex(Result); 157f4a2713aSLionel Sambuc // Note that lexing to the end of the buffer doesn't implicitly delete the 158f4a2713aSLionel Sambuc // lexer when in raw mode. 159f4a2713aSLionel Sambuc return BufferPtr == BufferEnd; 160f4a2713aSLionel Sambuc } 161f4a2713aSLionel Sambuc 162f4a2713aSLionel Sambuc /// isKeepWhitespaceMode - Return true if the lexer should return tokens for 163f4a2713aSLionel Sambuc /// every character in the file, including whitespace and comments. This 164f4a2713aSLionel Sambuc /// should only be used in raw mode, as the preprocessor is not prepared to 165f4a2713aSLionel Sambuc /// deal with the excess tokens. isKeepWhitespaceMode()166f4a2713aSLionel Sambuc bool isKeepWhitespaceMode() const { 167f4a2713aSLionel Sambuc return ExtendedTokenMode > 1; 168f4a2713aSLionel Sambuc } 169f4a2713aSLionel Sambuc 170f4a2713aSLionel Sambuc /// SetKeepWhitespaceMode - This method lets clients enable or disable 171f4a2713aSLionel Sambuc /// whitespace retention mode. SetKeepWhitespaceMode(bool Val)172f4a2713aSLionel Sambuc void SetKeepWhitespaceMode(bool Val) { 173f4a2713aSLionel Sambuc assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) && 174f4a2713aSLionel Sambuc "Can only retain whitespace in raw mode or -traditional-cpp"); 175f4a2713aSLionel Sambuc ExtendedTokenMode = Val ? 2 : 0; 176f4a2713aSLionel Sambuc } 177f4a2713aSLionel Sambuc 178f4a2713aSLionel Sambuc /// inKeepCommentMode - Return true if the lexer should return comments as 179f4a2713aSLionel Sambuc /// tokens. inKeepCommentMode()180f4a2713aSLionel Sambuc bool inKeepCommentMode() const { 181f4a2713aSLionel Sambuc return ExtendedTokenMode > 0; 182f4a2713aSLionel Sambuc } 183f4a2713aSLionel Sambuc 184f4a2713aSLionel Sambuc /// SetCommentRetentionMode - Change the comment retention mode of the lexer 185f4a2713aSLionel Sambuc /// to the specified mode. This is really only useful when lexing in raw 186f4a2713aSLionel Sambuc /// mode, because otherwise the lexer needs to manage this. SetCommentRetentionState(bool Mode)187f4a2713aSLionel Sambuc void SetCommentRetentionState(bool Mode) { 188f4a2713aSLionel Sambuc assert(!isKeepWhitespaceMode() && 189f4a2713aSLionel Sambuc "Can't play with comment retention state when retaining whitespace"); 190f4a2713aSLionel Sambuc ExtendedTokenMode = Mode ? 1 : 0; 191f4a2713aSLionel Sambuc } 192f4a2713aSLionel Sambuc 193f4a2713aSLionel Sambuc /// Sets the extended token mode back to its initial value, according to the 194f4a2713aSLionel Sambuc /// language options and preprocessor. This controls whether the lexer 195f4a2713aSLionel Sambuc /// produces comment and whitespace tokens. 196f4a2713aSLionel Sambuc /// 197f4a2713aSLionel Sambuc /// This requires the lexer to have an associated preprocessor. A standalone 198f4a2713aSLionel Sambuc /// lexer has nothing to reset to. 199f4a2713aSLionel Sambuc void resetExtendedTokenMode(); 200f4a2713aSLionel Sambuc 201f4a2713aSLionel Sambuc /// Gets source code buffer. getBuffer()202f4a2713aSLionel Sambuc StringRef getBuffer() const { 203f4a2713aSLionel Sambuc return StringRef(BufferStart, BufferEnd - BufferStart); 204f4a2713aSLionel Sambuc } 205f4a2713aSLionel Sambuc 206f4a2713aSLionel Sambuc /// ReadToEndOfLine - Read the rest of the current preprocessor line as an 207f4a2713aSLionel Sambuc /// uninterpreted string. This switches the lexer out of directive mode. 208*0a6a1f1dSLionel Sambuc void ReadToEndOfLine(SmallVectorImpl<char> *Result = nullptr); 209f4a2713aSLionel Sambuc 210f4a2713aSLionel Sambuc 211f4a2713aSLionel Sambuc /// Diag - Forwarding function for diagnostics. This translate a source 212f4a2713aSLionel Sambuc /// position in the current buffer into a SourceLocation object for rendering. 213f4a2713aSLionel Sambuc DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const; 214f4a2713aSLionel Sambuc 215f4a2713aSLionel Sambuc /// getSourceLocation - Return a source location identifier for the specified 216f4a2713aSLionel Sambuc /// offset in the current file. 217f4a2713aSLionel Sambuc SourceLocation getSourceLocation(const char *Loc, unsigned TokLen = 1) const; 218f4a2713aSLionel Sambuc 219f4a2713aSLionel Sambuc /// getSourceLocation - Return a source location for the next character in 220f4a2713aSLionel Sambuc /// the current file. getSourceLocation()221*0a6a1f1dSLionel Sambuc SourceLocation getSourceLocation() override { 222*0a6a1f1dSLionel Sambuc return getSourceLocation(BufferPtr); 223*0a6a1f1dSLionel Sambuc } 224f4a2713aSLionel Sambuc 225f4a2713aSLionel Sambuc /// \brief Return the current location in the buffer. getBufferLocation()226f4a2713aSLionel Sambuc const char *getBufferLocation() const { return BufferPtr; } 227f4a2713aSLionel Sambuc 228f4a2713aSLionel Sambuc /// Stringify - Convert the specified string into a C string by escaping '\' 229f4a2713aSLionel Sambuc /// and " characters. This does not add surrounding ""'s to the string. 230f4a2713aSLionel Sambuc /// If Charify is true, this escapes the ' character instead of ". 231f4a2713aSLionel Sambuc static std::string Stringify(const std::string &Str, bool Charify = false); 232f4a2713aSLionel Sambuc 233f4a2713aSLionel Sambuc /// Stringify - Convert the specified string into a C string by escaping '\' 234f4a2713aSLionel Sambuc /// and " characters. This does not add surrounding ""'s to the string. 235f4a2713aSLionel Sambuc static void Stringify(SmallVectorImpl<char> &Str); 236f4a2713aSLionel Sambuc 237f4a2713aSLionel Sambuc 238f4a2713aSLionel Sambuc /// getSpelling - This method is used to get the spelling of a token into a 239f4a2713aSLionel Sambuc /// preallocated buffer, instead of as an std::string. The caller is required 240f4a2713aSLionel Sambuc /// to allocate enough space for the token, which is guaranteed to be at least 241f4a2713aSLionel Sambuc /// Tok.getLength() bytes long. The length of the actual result is returned. 242f4a2713aSLionel Sambuc /// 243f4a2713aSLionel Sambuc /// Note that this method may do two possible things: it may either fill in 244f4a2713aSLionel Sambuc /// the buffer specified with characters, or it may *change the input pointer* 245f4a2713aSLionel Sambuc /// to point to a constant buffer with the data already in it (avoiding a 246f4a2713aSLionel Sambuc /// copy). The caller is not allowed to modify the returned buffer pointer 247f4a2713aSLionel Sambuc /// if an internal buffer is returned. 248f4a2713aSLionel Sambuc static unsigned getSpelling(const Token &Tok, const char *&Buffer, 249f4a2713aSLionel Sambuc const SourceManager &SourceMgr, 250f4a2713aSLionel Sambuc const LangOptions &LangOpts, 251*0a6a1f1dSLionel Sambuc bool *Invalid = nullptr); 252f4a2713aSLionel Sambuc 253f4a2713aSLionel Sambuc /// getSpelling() - Return the 'spelling' of the Tok token. The spelling of a 254f4a2713aSLionel Sambuc /// token is the characters used to represent the token in the source file 255f4a2713aSLionel Sambuc /// after trigraph expansion and escaped-newline folding. In particular, this 256f4a2713aSLionel Sambuc /// wants to get the true, uncanonicalized, spelling of things like digraphs 257f4a2713aSLionel Sambuc /// UCNs, etc. 258f4a2713aSLionel Sambuc static std::string getSpelling(const Token &Tok, 259f4a2713aSLionel Sambuc const SourceManager &SourceMgr, 260f4a2713aSLionel Sambuc const LangOptions &LangOpts, 261*0a6a1f1dSLionel Sambuc bool *Invalid = nullptr); 262f4a2713aSLionel Sambuc 263f4a2713aSLionel Sambuc /// getSpelling - This method is used to get the spelling of the 264f4a2713aSLionel Sambuc /// token at the given source location. If, as is usually true, it 265f4a2713aSLionel Sambuc /// is not necessary to copy any data, then the returned string may 266f4a2713aSLionel Sambuc /// not point into the provided buffer. 267f4a2713aSLionel Sambuc /// 268f4a2713aSLionel Sambuc /// This method lexes at the expansion depth of the given 269f4a2713aSLionel Sambuc /// location and does not jump to the expansion or spelling 270f4a2713aSLionel Sambuc /// location. 271f4a2713aSLionel Sambuc static StringRef getSpelling(SourceLocation loc, 272f4a2713aSLionel Sambuc SmallVectorImpl<char> &buffer, 273f4a2713aSLionel Sambuc const SourceManager &SourceMgr, 274f4a2713aSLionel Sambuc const LangOptions &LangOpts, 275*0a6a1f1dSLionel Sambuc bool *invalid = nullptr); 276f4a2713aSLionel Sambuc 277f4a2713aSLionel Sambuc /// MeasureTokenLength - Relex the token at the specified location and return 278f4a2713aSLionel Sambuc /// its length in bytes in the input file. If the token needs cleaning (e.g. 279f4a2713aSLionel Sambuc /// includes a trigraph or an escaped newline) then this count includes bytes 280f4a2713aSLionel Sambuc /// that are part of that. 281f4a2713aSLionel Sambuc static unsigned MeasureTokenLength(SourceLocation Loc, 282f4a2713aSLionel Sambuc const SourceManager &SM, 283f4a2713aSLionel Sambuc const LangOptions &LangOpts); 284f4a2713aSLionel Sambuc 285f4a2713aSLionel Sambuc /// \brief Relex the token at the specified location. 286f4a2713aSLionel Sambuc /// \returns true if there was a failure, false on success. 287f4a2713aSLionel Sambuc static bool getRawToken(SourceLocation Loc, Token &Result, 288f4a2713aSLionel Sambuc const SourceManager &SM, 289f4a2713aSLionel Sambuc const LangOptions &LangOpts, 290f4a2713aSLionel Sambuc bool IgnoreWhiteSpace = false); 291f4a2713aSLionel Sambuc 292f4a2713aSLionel Sambuc /// \brief Given a location any where in a source buffer, find the location 293f4a2713aSLionel Sambuc /// that corresponds to the beginning of the token in which the original 294f4a2713aSLionel Sambuc /// source location lands. 295f4a2713aSLionel Sambuc static SourceLocation GetBeginningOfToken(SourceLocation Loc, 296f4a2713aSLionel Sambuc const SourceManager &SM, 297f4a2713aSLionel Sambuc const LangOptions &LangOpts); 298f4a2713aSLionel Sambuc 299f4a2713aSLionel Sambuc /// AdvanceToTokenCharacter - If the current SourceLocation specifies a 300f4a2713aSLionel Sambuc /// location at the start of a token, return a new location that specifies a 301f4a2713aSLionel Sambuc /// character within the token. This handles trigraphs and escaped newlines. 302f4a2713aSLionel Sambuc static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 303f4a2713aSLionel Sambuc unsigned Character, 304f4a2713aSLionel Sambuc const SourceManager &SM, 305f4a2713aSLionel Sambuc const LangOptions &LangOpts); 306f4a2713aSLionel Sambuc 307f4a2713aSLionel Sambuc /// \brief Computes the source location just past the end of the 308f4a2713aSLionel Sambuc /// token at this source location. 309f4a2713aSLionel Sambuc /// 310f4a2713aSLionel Sambuc /// This routine can be used to produce a source location that 311f4a2713aSLionel Sambuc /// points just past the end of the token referenced by \p Loc, and 312f4a2713aSLionel Sambuc /// is generally used when a diagnostic needs to point just after a 313f4a2713aSLionel Sambuc /// token where it expected something different that it received. If 314f4a2713aSLionel Sambuc /// the returned source location would not be meaningful (e.g., if 315f4a2713aSLionel Sambuc /// it points into a macro), this routine returns an invalid 316f4a2713aSLionel Sambuc /// source location. 317f4a2713aSLionel Sambuc /// 318f4a2713aSLionel Sambuc /// \param Offset an offset from the end of the token, where the source 319f4a2713aSLionel Sambuc /// location should refer to. The default offset (0) produces a source 320f4a2713aSLionel Sambuc /// location pointing just past the end of the token; an offset of 1 produces 321f4a2713aSLionel Sambuc /// a source location pointing to the last character in the token, etc. 322f4a2713aSLionel Sambuc static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, 323f4a2713aSLionel Sambuc const SourceManager &SM, 324f4a2713aSLionel Sambuc const LangOptions &LangOpts); 325f4a2713aSLionel Sambuc 326f4a2713aSLionel Sambuc /// \brief Returns true if the given MacroID location points at the first 327f4a2713aSLionel Sambuc /// token of the macro expansion. 328f4a2713aSLionel Sambuc /// 329f4a2713aSLionel Sambuc /// \param MacroBegin If non-null and function returns true, it is set to 330f4a2713aSLionel Sambuc /// begin location of the macro. 331f4a2713aSLionel Sambuc static bool isAtStartOfMacroExpansion(SourceLocation loc, 332f4a2713aSLionel Sambuc const SourceManager &SM, 333f4a2713aSLionel Sambuc const LangOptions &LangOpts, 334*0a6a1f1dSLionel Sambuc SourceLocation *MacroBegin = nullptr); 335f4a2713aSLionel Sambuc 336f4a2713aSLionel Sambuc /// \brief Returns true if the given MacroID location points at the last 337f4a2713aSLionel Sambuc /// token of the macro expansion. 338f4a2713aSLionel Sambuc /// 339f4a2713aSLionel Sambuc /// \param MacroEnd If non-null and function returns true, it is set to 340f4a2713aSLionel Sambuc /// end location of the macro. 341f4a2713aSLionel Sambuc static bool isAtEndOfMacroExpansion(SourceLocation loc, 342f4a2713aSLionel Sambuc const SourceManager &SM, 343f4a2713aSLionel Sambuc const LangOptions &LangOpts, 344*0a6a1f1dSLionel Sambuc SourceLocation *MacroEnd = nullptr); 345f4a2713aSLionel Sambuc 346f4a2713aSLionel Sambuc /// \brief Accepts a range and returns a character range with file locations. 347f4a2713aSLionel Sambuc /// 348f4a2713aSLionel Sambuc /// Returns a null range if a part of the range resides inside a macro 349f4a2713aSLionel Sambuc /// expansion or the range does not reside on the same FileID. 350f4a2713aSLionel Sambuc /// 351f4a2713aSLionel Sambuc /// This function is trying to deal with macros and return a range based on 352f4a2713aSLionel Sambuc /// file locations. The cases where it can successfully handle macros are: 353f4a2713aSLionel Sambuc /// 354f4a2713aSLionel Sambuc /// -begin or end range lies at the start or end of a macro expansion, in 355f4a2713aSLionel Sambuc /// which case the location will be set to the expansion point, e.g: 356f4a2713aSLionel Sambuc /// \#define M 1 2 357f4a2713aSLionel Sambuc /// a M 358f4a2713aSLionel Sambuc /// If you have a range [a, 2] (where 2 came from the macro), the function 359f4a2713aSLionel Sambuc /// will return a range for "a M" 360f4a2713aSLionel Sambuc /// if you have range [a, 1], the function will fail because the range 361f4a2713aSLionel Sambuc /// overlaps with only a part of the macro 362f4a2713aSLionel Sambuc /// 363f4a2713aSLionel Sambuc /// -The macro is a function macro and the range can be mapped to the macro 364f4a2713aSLionel Sambuc /// arguments, e.g: 365f4a2713aSLionel Sambuc /// \#define M 1 2 366f4a2713aSLionel Sambuc /// \#define FM(x) x 367f4a2713aSLionel Sambuc /// FM(a b M) 368f4a2713aSLionel Sambuc /// if you have range [b, 2], the function will return the file range "b M" 369f4a2713aSLionel Sambuc /// inside the macro arguments. 370f4a2713aSLionel Sambuc /// if you have range [a, 2], the function will return the file range 371f4a2713aSLionel Sambuc /// "FM(a b M)" since the range includes all of the macro expansion. 372f4a2713aSLionel Sambuc static CharSourceRange makeFileCharRange(CharSourceRange Range, 373f4a2713aSLionel Sambuc const SourceManager &SM, 374f4a2713aSLionel Sambuc const LangOptions &LangOpts); 375f4a2713aSLionel Sambuc 376f4a2713aSLionel Sambuc /// \brief Returns a string for the source that the range encompasses. 377f4a2713aSLionel Sambuc static StringRef getSourceText(CharSourceRange Range, 378f4a2713aSLionel Sambuc const SourceManager &SM, 379f4a2713aSLionel Sambuc const LangOptions &LangOpts, 380*0a6a1f1dSLionel Sambuc bool *Invalid = nullptr); 381f4a2713aSLionel Sambuc 382f4a2713aSLionel Sambuc /// \brief Retrieve the name of the immediate macro expansion. 383f4a2713aSLionel Sambuc /// 384f4a2713aSLionel Sambuc /// This routine starts from a source location, and finds the name of the macro 385f4a2713aSLionel Sambuc /// responsible for its immediate expansion. It looks through any intervening 386f4a2713aSLionel Sambuc /// macro argument expansions to compute this. It returns a StringRef which 387f4a2713aSLionel Sambuc /// refers to the SourceManager-owned buffer of the source where that macro 388f4a2713aSLionel Sambuc /// name is spelled. Thus, the result shouldn't out-live that SourceManager. 389f4a2713aSLionel Sambuc static StringRef getImmediateMacroName(SourceLocation Loc, 390f4a2713aSLionel Sambuc const SourceManager &SM, 391f4a2713aSLionel Sambuc const LangOptions &LangOpts); 392f4a2713aSLionel Sambuc 393f4a2713aSLionel Sambuc /// \brief Compute the preamble of the given file. 394f4a2713aSLionel Sambuc /// 395f4a2713aSLionel Sambuc /// The preamble of a file contains the initial comments, include directives, 396f4a2713aSLionel Sambuc /// and other preprocessor directives that occur before the code in this 397f4a2713aSLionel Sambuc /// particular file actually begins. The preamble of the main source file is 398f4a2713aSLionel Sambuc /// a potential prefix header. 399f4a2713aSLionel Sambuc /// 400f4a2713aSLionel Sambuc /// \param Buffer The memory buffer containing the file's contents. 401f4a2713aSLionel Sambuc /// 402f4a2713aSLionel Sambuc /// \param MaxLines If non-zero, restrict the length of the preamble 403f4a2713aSLionel Sambuc /// to fewer than this number of lines. 404f4a2713aSLionel Sambuc /// 405f4a2713aSLionel Sambuc /// \returns The offset into the file where the preamble ends and the rest 406f4a2713aSLionel Sambuc /// of the file begins along with a boolean value indicating whether 407f4a2713aSLionel Sambuc /// the preamble ends at the beginning of a new line. 408*0a6a1f1dSLionel Sambuc static std::pair<unsigned, bool> ComputePreamble(StringRef Buffer, 409*0a6a1f1dSLionel Sambuc const LangOptions &LangOpts, 410f4a2713aSLionel Sambuc unsigned MaxLines = 0); 411f4a2713aSLionel Sambuc 412f4a2713aSLionel Sambuc /// \brief Checks that the given token is the first token that occurs after 413f4a2713aSLionel Sambuc /// the given location (this excludes comments and whitespace). Returns the 414f4a2713aSLionel Sambuc /// location immediately after the specified token. If the token is not found 415f4a2713aSLionel Sambuc /// or the location is inside a macro, the returned source location will be 416f4a2713aSLionel Sambuc /// invalid. 417f4a2713aSLionel Sambuc static SourceLocation findLocationAfterToken(SourceLocation loc, 418f4a2713aSLionel Sambuc tok::TokenKind TKind, 419f4a2713aSLionel Sambuc const SourceManager &SM, 420f4a2713aSLionel Sambuc const LangOptions &LangOpts, 421f4a2713aSLionel Sambuc bool SkipTrailingWhitespaceAndNewLine); 422f4a2713aSLionel Sambuc 423f4a2713aSLionel Sambuc /// \brief Returns true if the given character could appear in an identifier. 424f4a2713aSLionel Sambuc static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts); 425f4a2713aSLionel Sambuc 426f4a2713aSLionel Sambuc /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever 427f4a2713aSLionel Sambuc /// emit a warning. getCharAndSizeNoWarn(const char * Ptr,unsigned & Size,const LangOptions & LangOpts)428f4a2713aSLionel Sambuc static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, 429f4a2713aSLionel Sambuc const LangOptions &LangOpts) { 430f4a2713aSLionel Sambuc // If this is not a trigraph and not a UCN or escaped newline, return 431f4a2713aSLionel Sambuc // quickly. 432f4a2713aSLionel Sambuc if (isObviouslySimpleCharacter(Ptr[0])) { 433f4a2713aSLionel Sambuc Size = 1; 434f4a2713aSLionel Sambuc return *Ptr; 435f4a2713aSLionel Sambuc } 436f4a2713aSLionel Sambuc 437f4a2713aSLionel Sambuc Size = 0; 438f4a2713aSLionel Sambuc return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts); 439f4a2713aSLionel Sambuc } 440f4a2713aSLionel Sambuc 441f4a2713aSLionel Sambuc //===--------------------------------------------------------------------===// 442f4a2713aSLionel Sambuc // Internal implementation interfaces. 443f4a2713aSLionel Sambuc private: 444f4a2713aSLionel Sambuc 445f4a2713aSLionel Sambuc /// LexTokenInternal - Internal interface to lex a preprocessing token. Called 446f4a2713aSLionel Sambuc /// by Lex. 447f4a2713aSLionel Sambuc /// 448f4a2713aSLionel Sambuc bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine); 449f4a2713aSLionel Sambuc 450f4a2713aSLionel Sambuc bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr); 451f4a2713aSLionel Sambuc 452f4a2713aSLionel Sambuc /// Given that a token begins with the Unicode character \p C, figure out 453f4a2713aSLionel Sambuc /// what kind of token it is and dispatch to the appropriate lexing helper 454f4a2713aSLionel Sambuc /// function. 455f4a2713aSLionel Sambuc bool LexUnicode(Token &Result, uint32_t C, const char *CurPtr); 456f4a2713aSLionel Sambuc 457f4a2713aSLionel Sambuc /// FormTokenWithChars - When we lex a token, we have identified a span 458f4a2713aSLionel Sambuc /// starting at BufferPtr, going to TokEnd that forms the token. This method 459f4a2713aSLionel Sambuc /// takes that range and assigns it to the token as its location and size. In 460f4a2713aSLionel Sambuc /// addition, since tokens cannot overlap, this also updates BufferPtr to be 461f4a2713aSLionel Sambuc /// TokEnd. FormTokenWithChars(Token & Result,const char * TokEnd,tok::TokenKind Kind)462f4a2713aSLionel Sambuc void FormTokenWithChars(Token &Result, const char *TokEnd, 463f4a2713aSLionel Sambuc tok::TokenKind Kind) { 464f4a2713aSLionel Sambuc unsigned TokLen = TokEnd-BufferPtr; 465f4a2713aSLionel Sambuc Result.setLength(TokLen); 466f4a2713aSLionel Sambuc Result.setLocation(getSourceLocation(BufferPtr, TokLen)); 467f4a2713aSLionel Sambuc Result.setKind(Kind); 468f4a2713aSLionel Sambuc BufferPtr = TokEnd; 469f4a2713aSLionel Sambuc } 470f4a2713aSLionel Sambuc 471f4a2713aSLionel Sambuc /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a 472f4a2713aSLionel Sambuc /// tok::l_paren token, 0 if it is something else and 2 if there are no more 473f4a2713aSLionel Sambuc /// tokens in the buffer controlled by this lexer. 474f4a2713aSLionel Sambuc unsigned isNextPPTokenLParen(); 475f4a2713aSLionel Sambuc 476f4a2713aSLionel Sambuc //===--------------------------------------------------------------------===// 477f4a2713aSLionel Sambuc // Lexer character reading interfaces. 478f4a2713aSLionel Sambuc 479f4a2713aSLionel Sambuc // This lexer is built on two interfaces for reading characters, both of which 480f4a2713aSLionel Sambuc // automatically provide phase 1/2 translation. getAndAdvanceChar is used 481f4a2713aSLionel Sambuc // when we know that we will be reading a character from the input buffer and 482f4a2713aSLionel Sambuc // that this character will be part of the result token. This occurs in (f.e.) 483f4a2713aSLionel Sambuc // string processing, because we know we need to read until we find the 484f4a2713aSLionel Sambuc // closing '"' character. 485f4a2713aSLionel Sambuc // 486f4a2713aSLionel Sambuc // The second interface is the combination of getCharAndSize with 487f4a2713aSLionel Sambuc // ConsumeChar. getCharAndSize reads a phase 1/2 translated character, 488f4a2713aSLionel Sambuc // returning it and its size. If the lexer decides that this character is 489f4a2713aSLionel Sambuc // part of the current token, it calls ConsumeChar on it. This two stage 490f4a2713aSLionel Sambuc // approach allows us to emit diagnostics for characters (e.g. warnings about 491f4a2713aSLionel Sambuc // trigraphs), knowing that they only are emitted if the character is 492f4a2713aSLionel Sambuc // consumed. 493f4a2713aSLionel Sambuc 494f4a2713aSLionel Sambuc /// isObviouslySimpleCharacter - Return true if the specified character is 495f4a2713aSLionel Sambuc /// obviously the same in translation phase 1 and translation phase 3. This 496f4a2713aSLionel Sambuc /// can return false for characters that end up being the same, but it will 497f4a2713aSLionel Sambuc /// never return true for something that needs to be mapped. isObviouslySimpleCharacter(char C)498f4a2713aSLionel Sambuc static bool isObviouslySimpleCharacter(char C) { 499f4a2713aSLionel Sambuc return C != '?' && C != '\\'; 500f4a2713aSLionel Sambuc } 501f4a2713aSLionel Sambuc 502f4a2713aSLionel Sambuc /// getAndAdvanceChar - Read a single 'character' from the specified buffer, 503f4a2713aSLionel Sambuc /// advance over it, and return it. This is tricky in several cases. Here we 504f4a2713aSLionel Sambuc /// just handle the trivial case and fall-back to the non-inlined 505f4a2713aSLionel Sambuc /// getCharAndSizeSlow method to handle the hard case. getAndAdvanceChar(const char * & Ptr,Token & Tok)506f4a2713aSLionel Sambuc inline char getAndAdvanceChar(const char *&Ptr, Token &Tok) { 507f4a2713aSLionel Sambuc // If this is not a trigraph and not a UCN or escaped newline, return 508f4a2713aSLionel Sambuc // quickly. 509f4a2713aSLionel Sambuc if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++; 510f4a2713aSLionel Sambuc 511f4a2713aSLionel Sambuc unsigned Size = 0; 512f4a2713aSLionel Sambuc char C = getCharAndSizeSlow(Ptr, Size, &Tok); 513f4a2713aSLionel Sambuc Ptr += Size; 514f4a2713aSLionel Sambuc return C; 515f4a2713aSLionel Sambuc } 516f4a2713aSLionel Sambuc 517f4a2713aSLionel Sambuc /// ConsumeChar - When a character (identified by getCharAndSize) is consumed 518f4a2713aSLionel Sambuc /// and added to a given token, check to see if there are diagnostics that 519f4a2713aSLionel Sambuc /// need to be emitted or flags that need to be set on the token. If so, do 520f4a2713aSLionel Sambuc /// it. ConsumeChar(const char * Ptr,unsigned Size,Token & Tok)521f4a2713aSLionel Sambuc const char *ConsumeChar(const char *Ptr, unsigned Size, Token &Tok) { 522f4a2713aSLionel Sambuc // Normal case, we consumed exactly one token. Just return it. 523f4a2713aSLionel Sambuc if (Size == 1) 524f4a2713aSLionel Sambuc return Ptr+Size; 525f4a2713aSLionel Sambuc 526f4a2713aSLionel Sambuc // Otherwise, re-lex the character with a current token, allowing 527f4a2713aSLionel Sambuc // diagnostics to be emitted and flags to be set. 528f4a2713aSLionel Sambuc Size = 0; 529f4a2713aSLionel Sambuc getCharAndSizeSlow(Ptr, Size, &Tok); 530f4a2713aSLionel Sambuc return Ptr+Size; 531f4a2713aSLionel Sambuc } 532f4a2713aSLionel Sambuc 533f4a2713aSLionel Sambuc /// getCharAndSize - Peek a single 'character' from the specified buffer, 534f4a2713aSLionel Sambuc /// get its size, and return it. This is tricky in several cases. Here we 535f4a2713aSLionel Sambuc /// just handle the trivial case and fall-back to the non-inlined 536f4a2713aSLionel Sambuc /// getCharAndSizeSlow method to handle the hard case. getCharAndSize(const char * Ptr,unsigned & Size)537f4a2713aSLionel Sambuc inline char getCharAndSize(const char *Ptr, unsigned &Size) { 538f4a2713aSLionel Sambuc // If this is not a trigraph and not a UCN or escaped newline, return 539f4a2713aSLionel Sambuc // quickly. 540f4a2713aSLionel Sambuc if (isObviouslySimpleCharacter(Ptr[0])) { 541f4a2713aSLionel Sambuc Size = 1; 542f4a2713aSLionel Sambuc return *Ptr; 543f4a2713aSLionel Sambuc } 544f4a2713aSLionel Sambuc 545f4a2713aSLionel Sambuc Size = 0; 546f4a2713aSLionel Sambuc return getCharAndSizeSlow(Ptr, Size); 547f4a2713aSLionel Sambuc } 548f4a2713aSLionel Sambuc 549f4a2713aSLionel Sambuc /// getCharAndSizeSlow - Handle the slow/uncommon case of the getCharAndSize 550f4a2713aSLionel Sambuc /// method. 551*0a6a1f1dSLionel Sambuc char getCharAndSizeSlow(const char *Ptr, unsigned &Size, 552*0a6a1f1dSLionel Sambuc Token *Tok = nullptr); 553f4a2713aSLionel Sambuc 554f4a2713aSLionel Sambuc /// getEscapedNewLineSize - Return the size of the specified escaped newline, 555f4a2713aSLionel Sambuc /// or 0 if it is not an escaped newline. P[-1] is known to be a "\" on entry 556f4a2713aSLionel Sambuc /// to this function. 557f4a2713aSLionel Sambuc static unsigned getEscapedNewLineSize(const char *P); 558f4a2713aSLionel Sambuc 559f4a2713aSLionel Sambuc /// SkipEscapedNewLines - If P points to an escaped newline (or a series of 560f4a2713aSLionel Sambuc /// them), skip over them and return the first non-escaped-newline found, 561f4a2713aSLionel Sambuc /// otherwise return P. 562f4a2713aSLionel Sambuc static const char *SkipEscapedNewLines(const char *P); 563f4a2713aSLionel Sambuc 564f4a2713aSLionel Sambuc /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a 565f4a2713aSLionel Sambuc /// diagnostic. 566f4a2713aSLionel Sambuc static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, 567f4a2713aSLionel Sambuc const LangOptions &LangOpts); 568f4a2713aSLionel Sambuc 569f4a2713aSLionel Sambuc //===--------------------------------------------------------------------===// 570f4a2713aSLionel Sambuc // Other lexer functions. 571f4a2713aSLionel Sambuc 572f4a2713aSLionel Sambuc void SkipBytes(unsigned Bytes, bool StartOfLine); 573f4a2713aSLionel Sambuc 574f4a2713aSLionel Sambuc void PropagateLineStartLeadingSpaceInfo(Token &Result); 575f4a2713aSLionel Sambuc 576f4a2713aSLionel Sambuc const char *LexUDSuffix(Token &Result, const char *CurPtr, 577f4a2713aSLionel Sambuc bool IsStringLiteral); 578f4a2713aSLionel Sambuc 579f4a2713aSLionel Sambuc // Helper functions to lex the remainder of a token of the specific type. 580f4a2713aSLionel Sambuc bool LexIdentifier (Token &Result, const char *CurPtr); 581f4a2713aSLionel Sambuc bool LexNumericConstant (Token &Result, const char *CurPtr); 582f4a2713aSLionel Sambuc bool LexStringLiteral (Token &Result, const char *CurPtr, 583f4a2713aSLionel Sambuc tok::TokenKind Kind); 584f4a2713aSLionel Sambuc bool LexRawStringLiteral (Token &Result, const char *CurPtr, 585f4a2713aSLionel Sambuc tok::TokenKind Kind); 586f4a2713aSLionel Sambuc bool LexAngledStringLiteral(Token &Result, const char *CurPtr); 587f4a2713aSLionel Sambuc bool LexCharConstant (Token &Result, const char *CurPtr, 588f4a2713aSLionel Sambuc tok::TokenKind Kind); 589f4a2713aSLionel Sambuc bool LexEndOfFile (Token &Result, const char *CurPtr); 590f4a2713aSLionel Sambuc bool SkipWhitespace (Token &Result, const char *CurPtr, 591f4a2713aSLionel Sambuc bool &TokAtPhysicalStartOfLine); 592f4a2713aSLionel Sambuc bool SkipLineComment (Token &Result, const char *CurPtr, 593f4a2713aSLionel Sambuc bool &TokAtPhysicalStartOfLine); 594f4a2713aSLionel Sambuc bool SkipBlockComment (Token &Result, const char *CurPtr, 595f4a2713aSLionel Sambuc bool &TokAtPhysicalStartOfLine); 596f4a2713aSLionel Sambuc bool SaveLineComment (Token &Result, const char *CurPtr); 597f4a2713aSLionel Sambuc 598f4a2713aSLionel Sambuc bool IsStartOfConflictMarker(const char *CurPtr); 599f4a2713aSLionel Sambuc bool HandleEndOfConflictMarker(const char *CurPtr); 600f4a2713aSLionel Sambuc 601f4a2713aSLionel Sambuc bool isCodeCompletionPoint(const char *CurPtr) const; cutOffLexing()602f4a2713aSLionel Sambuc void cutOffLexing() { BufferPtr = BufferEnd; } 603f4a2713aSLionel Sambuc 604f4a2713aSLionel Sambuc bool isHexaLiteral(const char *Start, const LangOptions &LangOpts); 605f4a2713aSLionel Sambuc 606f4a2713aSLionel Sambuc 607f4a2713aSLionel Sambuc /// Read a universal character name. 608f4a2713aSLionel Sambuc /// 609f4a2713aSLionel Sambuc /// \param CurPtr The position in the source buffer after the initial '\'. 610f4a2713aSLionel Sambuc /// If the UCN is syntactically well-formed (but not necessarily 611f4a2713aSLionel Sambuc /// valid), this parameter will be updated to point to the 612f4a2713aSLionel Sambuc /// character after the UCN. 613f4a2713aSLionel Sambuc /// \param SlashLoc The position in the source buffer of the '\'. 614f4a2713aSLionel Sambuc /// \param Tok The token being formed. Pass \c NULL to suppress diagnostics 615f4a2713aSLionel Sambuc /// and handle token formation in the caller. 616f4a2713aSLionel Sambuc /// 617f4a2713aSLionel Sambuc /// \return The Unicode codepoint specified by the UCN, or 0 if the UCN is 618f4a2713aSLionel Sambuc /// invalid. 619f4a2713aSLionel Sambuc uint32_t tryReadUCN(const char *&CurPtr, const char *SlashLoc, Token *Tok); 620f4a2713aSLionel Sambuc 621*0a6a1f1dSLionel Sambuc /// \brief Try to consume a UCN as part of an identifier at the current 622*0a6a1f1dSLionel Sambuc /// location. 623*0a6a1f1dSLionel Sambuc /// \param CurPtr Initially points to the range of characters in the source 624*0a6a1f1dSLionel Sambuc /// buffer containing the '\'. Updated to point past the end of 625*0a6a1f1dSLionel Sambuc /// the UCN on success. 626*0a6a1f1dSLionel Sambuc /// \param Size The number of characters occupied by the '\' (including 627*0a6a1f1dSLionel Sambuc /// trigraphs and escaped newlines). 628*0a6a1f1dSLionel Sambuc /// \param Result The token being produced. Marked as containing a UCN on 629*0a6a1f1dSLionel Sambuc /// success. 630*0a6a1f1dSLionel Sambuc /// \return \c true if a UCN was lexed and it produced an acceptable 631*0a6a1f1dSLionel Sambuc /// identifier character, \c false otherwise. 632*0a6a1f1dSLionel Sambuc bool tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, 633*0a6a1f1dSLionel Sambuc Token &Result); 634*0a6a1f1dSLionel Sambuc 635*0a6a1f1dSLionel Sambuc /// \brief Try to consume an identifier character encoded in UTF-8. 636*0a6a1f1dSLionel Sambuc /// \param CurPtr Points to the start of the (potential) UTF-8 code unit 637*0a6a1f1dSLionel Sambuc /// sequence. On success, updated to point past the end of it. 638*0a6a1f1dSLionel Sambuc /// \return \c true if a UTF-8 sequence mapping to an acceptable identifier 639*0a6a1f1dSLionel Sambuc /// character was lexed, \c false otherwise. 640*0a6a1f1dSLionel Sambuc bool tryConsumeIdentifierUTF8Char(const char *&CurPtr); 641*0a6a1f1dSLionel Sambuc }; 642f4a2713aSLionel Sambuc 643f4a2713aSLionel Sambuc } // end namespace clang 644f4a2713aSLionel Sambuc 645f4a2713aSLionel Sambuc #endif 646