1f4a2713aSLionel Sambuc //===--- Lexer.h - C Language Family Lexer ----------------------*- C++ -*-===//
2f4a2713aSLionel Sambuc //
3f4a2713aSLionel Sambuc //                     The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc //
5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc //
8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc //
10f4a2713aSLionel Sambuc //  This file defines the Lexer interface.
11f4a2713aSLionel Sambuc //
12f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
13f4a2713aSLionel Sambuc 
14*0a6a1f1dSLionel Sambuc #ifndef LLVM_CLANG_LEX_LEXER_H
15*0a6a1f1dSLionel Sambuc #define LLVM_CLANG_LEX_LEXER_H
16f4a2713aSLionel Sambuc 
17f4a2713aSLionel Sambuc #include "clang/Basic/LangOptions.h"
18f4a2713aSLionel Sambuc #include "clang/Lex/PreprocessorLexer.h"
19f4a2713aSLionel Sambuc #include "llvm/ADT/SmallVector.h"
20f4a2713aSLionel Sambuc #include <cassert>
21f4a2713aSLionel Sambuc #include <string>
22f4a2713aSLionel Sambuc 
23f4a2713aSLionel Sambuc namespace clang {
24f4a2713aSLionel Sambuc class DiagnosticsEngine;
25f4a2713aSLionel Sambuc class SourceManager;
26f4a2713aSLionel Sambuc class Preprocessor;
27f4a2713aSLionel Sambuc class DiagnosticBuilder;
28f4a2713aSLionel Sambuc 
29f4a2713aSLionel Sambuc /// ConflictMarkerKind - Kinds of conflict marker which the lexer might be
30f4a2713aSLionel Sambuc /// recovering from.
31f4a2713aSLionel Sambuc enum ConflictMarkerKind {
32f4a2713aSLionel Sambuc   /// Not within a conflict marker.
33f4a2713aSLionel Sambuc   CMK_None,
34f4a2713aSLionel Sambuc   /// A normal or diff3 conflict marker, initiated by at least 7 "<"s,
35f4a2713aSLionel Sambuc   /// separated by at least 7 "="s or "|"s, and terminated by at least 7 ">"s.
36f4a2713aSLionel Sambuc   CMK_Normal,
37f4a2713aSLionel Sambuc   /// A Perforce-style conflict marker, initiated by 4 ">"s,
38f4a2713aSLionel Sambuc   /// separated by 4 "="s, and terminated by 4 "<"s.
39f4a2713aSLionel Sambuc   CMK_Perforce
40f4a2713aSLionel Sambuc };
41f4a2713aSLionel Sambuc 
42f4a2713aSLionel Sambuc /// Lexer - This provides a simple interface that turns a text buffer into a
43f4a2713aSLionel Sambuc /// stream of tokens.  This provides no support for file reading or buffering,
44f4a2713aSLionel Sambuc /// or buffering/seeking of tokens, only forward lexing is supported.  It relies
45f4a2713aSLionel Sambuc /// on the specified Preprocessor object to handle preprocessor directives, etc.
46f4a2713aSLionel Sambuc class Lexer : public PreprocessorLexer {
47*0a6a1f1dSLionel Sambuc   void anchor() override;
48f4a2713aSLionel Sambuc 
49f4a2713aSLionel Sambuc   //===--------------------------------------------------------------------===//
50f4a2713aSLionel Sambuc   // Constant configuration values for this lexer.
51f4a2713aSLionel Sambuc   const char *BufferStart;       // Start of the buffer.
52f4a2713aSLionel Sambuc   const char *BufferEnd;         // End of the buffer.
53f4a2713aSLionel Sambuc   SourceLocation FileLoc;        // Location for start of file.
54f4a2713aSLionel Sambuc   LangOptions LangOpts;          // LangOpts enabled by this language (cache).
55f4a2713aSLionel Sambuc   bool Is_PragmaLexer;           // True if lexer for _Pragma handling.
56f4a2713aSLionel Sambuc 
57f4a2713aSLionel Sambuc   //===--------------------------------------------------------------------===//
58f4a2713aSLionel Sambuc   // Context-specific lexing flags set by the preprocessor.
59f4a2713aSLionel Sambuc   //
60f4a2713aSLionel Sambuc 
61f4a2713aSLionel Sambuc   /// ExtendedTokenMode - The lexer can optionally keep comments and whitespace
62f4a2713aSLionel Sambuc   /// and return them as tokens.  This is used for -C and -CC modes, and
63f4a2713aSLionel Sambuc   /// whitespace preservation can be useful for some clients that want to lex
64f4a2713aSLionel Sambuc   /// the file in raw mode and get every character from the file.
65f4a2713aSLionel Sambuc   ///
66f4a2713aSLionel Sambuc   /// When this is set to 2 it returns comments and whitespace.  When set to 1
67f4a2713aSLionel Sambuc   /// it returns comments, when it is set to 0 it returns normal tokens only.
68f4a2713aSLionel Sambuc   unsigned char ExtendedTokenMode;
69f4a2713aSLionel Sambuc 
70f4a2713aSLionel Sambuc   //===--------------------------------------------------------------------===//
71f4a2713aSLionel Sambuc   // Context that changes as the file is lexed.
72f4a2713aSLionel Sambuc   // NOTE: any state that mutates when in raw mode must have save/restore code
73f4a2713aSLionel Sambuc   // in Lexer::isNextPPTokenLParen.
74f4a2713aSLionel Sambuc 
75f4a2713aSLionel Sambuc   // BufferPtr - Current pointer into the buffer.  This is the next character
76f4a2713aSLionel Sambuc   // to be lexed.
77f4a2713aSLionel Sambuc   const char *BufferPtr;
78f4a2713aSLionel Sambuc 
79f4a2713aSLionel Sambuc   // IsAtStartOfLine - True if the next lexed token should get the "start of
80f4a2713aSLionel Sambuc   // line" flag set on it.
81f4a2713aSLionel Sambuc   bool IsAtStartOfLine;
82f4a2713aSLionel Sambuc 
83f4a2713aSLionel Sambuc   bool IsAtPhysicalStartOfLine;
84f4a2713aSLionel Sambuc 
85f4a2713aSLionel Sambuc   bool HasLeadingSpace;
86f4a2713aSLionel Sambuc 
87f4a2713aSLionel Sambuc   bool HasLeadingEmptyMacro;
88f4a2713aSLionel Sambuc 
89f4a2713aSLionel Sambuc   // CurrentConflictMarkerState - The kind of conflict marker we are handling.
90f4a2713aSLionel Sambuc   ConflictMarkerKind CurrentConflictMarkerState;
91f4a2713aSLionel Sambuc 
92f4a2713aSLionel Sambuc   Lexer(const Lexer &) LLVM_DELETED_FUNCTION;
93f4a2713aSLionel Sambuc   void operator=(const Lexer &) LLVM_DELETED_FUNCTION;
94f4a2713aSLionel Sambuc   friend class Preprocessor;
95f4a2713aSLionel Sambuc 
96f4a2713aSLionel Sambuc   void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);
97f4a2713aSLionel Sambuc public:
98f4a2713aSLionel Sambuc 
99f4a2713aSLionel Sambuc   /// Lexer constructor - Create a new lexer object for the specified buffer
100f4a2713aSLionel Sambuc   /// with the specified preprocessor managing the lexing process.  This lexer
101f4a2713aSLionel Sambuc   /// assumes that the associated file buffer and Preprocessor objects will
102f4a2713aSLionel Sambuc   /// outlive it, so it doesn't take ownership of either of them.
103f4a2713aSLionel Sambuc   Lexer(FileID FID, const llvm::MemoryBuffer *InputBuffer, Preprocessor &PP);
104f4a2713aSLionel Sambuc 
105f4a2713aSLionel Sambuc   /// Lexer constructor - Create a new raw lexer object.  This object is only
106f4a2713aSLionel Sambuc   /// suitable for calls to 'LexFromRawLexer'.  This lexer assumes that the
107f4a2713aSLionel Sambuc   /// text range will outlive it, so it doesn't take ownership of it.
108f4a2713aSLionel Sambuc   Lexer(SourceLocation FileLoc, const LangOptions &LangOpts,
109f4a2713aSLionel Sambuc         const char *BufStart, const char *BufPtr, const char *BufEnd);
110f4a2713aSLionel Sambuc 
111f4a2713aSLionel Sambuc   /// Lexer constructor - Create a new raw lexer object.  This object is only
112f4a2713aSLionel Sambuc   /// suitable for calls to 'LexFromRawLexer'.  This lexer assumes that the
113f4a2713aSLionel Sambuc   /// text range will outlive it, so it doesn't take ownership of it.
114f4a2713aSLionel Sambuc   Lexer(FileID FID, const llvm::MemoryBuffer *InputBuffer,
115f4a2713aSLionel Sambuc         const SourceManager &SM, const LangOptions &LangOpts);
116f4a2713aSLionel Sambuc 
117f4a2713aSLionel Sambuc   /// Create_PragmaLexer: Lexer constructor - Create a new lexer object for
118f4a2713aSLionel Sambuc   /// _Pragma expansion.  This has a variety of magic semantics that this method
119f4a2713aSLionel Sambuc   /// sets up.  It returns a new'd Lexer that must be delete'd when done.
120f4a2713aSLionel Sambuc   static Lexer *Create_PragmaLexer(SourceLocation SpellingLoc,
121f4a2713aSLionel Sambuc                                    SourceLocation ExpansionLocStart,
122f4a2713aSLionel Sambuc                                    SourceLocation ExpansionLocEnd,
123f4a2713aSLionel Sambuc                                    unsigned TokLen, Preprocessor &PP);
124f4a2713aSLionel Sambuc 
125f4a2713aSLionel Sambuc 
126f4a2713aSLionel Sambuc   /// getLangOpts - Return the language features currently enabled.
127f4a2713aSLionel Sambuc   /// NOTE: this lexer modifies features as a file is parsed!
getLangOpts()128f4a2713aSLionel Sambuc   const LangOptions &getLangOpts() const { return LangOpts; }
129f4a2713aSLionel Sambuc 
130f4a2713aSLionel Sambuc   /// getFileLoc - Return the File Location for the file we are lexing out of.
131f4a2713aSLionel Sambuc   /// The physical location encodes the location where the characters come from,
132f4a2713aSLionel Sambuc   /// the virtual location encodes where we should *claim* the characters came
133f4a2713aSLionel Sambuc   /// from.  Currently this is only used by _Pragma handling.
getFileLoc()134f4a2713aSLionel Sambuc   SourceLocation getFileLoc() const { return FileLoc; }
135f4a2713aSLionel Sambuc 
136f4a2713aSLionel Sambuc private:
137f4a2713aSLionel Sambuc   /// Lex - Return the next token in the file.  If this is the end of file, it
138f4a2713aSLionel Sambuc   /// return the tok::eof token.  This implicitly involves the preprocessor.
139f4a2713aSLionel Sambuc   bool Lex(Token &Result);
140f4a2713aSLionel Sambuc 
141f4a2713aSLionel Sambuc public:
142f4a2713aSLionel Sambuc   /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
isPragmaLexer()143f4a2713aSLionel Sambuc   bool isPragmaLexer() const { return Is_PragmaLexer; }
144f4a2713aSLionel Sambuc 
145f4a2713aSLionel Sambuc private:
146f4a2713aSLionel Sambuc   /// IndirectLex - An indirect call to 'Lex' that can be invoked via
147f4a2713aSLionel Sambuc   ///  the PreprocessorLexer interface.
IndirectLex(Token & Result)148*0a6a1f1dSLionel Sambuc   void IndirectLex(Token &Result) override { Lex(Result); }
149f4a2713aSLionel Sambuc 
150f4a2713aSLionel Sambuc public:
151f4a2713aSLionel Sambuc   /// LexFromRawLexer - Lex a token from a designated raw lexer (one with no
152f4a2713aSLionel Sambuc   /// associated preprocessor object.  Return true if the 'next character to
153f4a2713aSLionel Sambuc   /// read' pointer points at the end of the lexer buffer, false otherwise.
LexFromRawLexer(Token & Result)154f4a2713aSLionel Sambuc   bool LexFromRawLexer(Token &Result) {
155f4a2713aSLionel Sambuc     assert(LexingRawMode && "Not already in raw mode!");
156f4a2713aSLionel Sambuc     Lex(Result);
157f4a2713aSLionel Sambuc     // Note that lexing to the end of the buffer doesn't implicitly delete the
158f4a2713aSLionel Sambuc     // lexer when in raw mode.
159f4a2713aSLionel Sambuc     return BufferPtr == BufferEnd;
160f4a2713aSLionel Sambuc   }
161f4a2713aSLionel Sambuc 
162f4a2713aSLionel Sambuc   /// isKeepWhitespaceMode - Return true if the lexer should return tokens for
163f4a2713aSLionel Sambuc   /// every character in the file, including whitespace and comments.  This
164f4a2713aSLionel Sambuc   /// should only be used in raw mode, as the preprocessor is not prepared to
165f4a2713aSLionel Sambuc   /// deal with the excess tokens.
isKeepWhitespaceMode()166f4a2713aSLionel Sambuc   bool isKeepWhitespaceMode() const {
167f4a2713aSLionel Sambuc     return ExtendedTokenMode > 1;
168f4a2713aSLionel Sambuc   }
169f4a2713aSLionel Sambuc 
170f4a2713aSLionel Sambuc   /// SetKeepWhitespaceMode - This method lets clients enable or disable
171f4a2713aSLionel Sambuc   /// whitespace retention mode.
SetKeepWhitespaceMode(bool Val)172f4a2713aSLionel Sambuc   void SetKeepWhitespaceMode(bool Val) {
173f4a2713aSLionel Sambuc     assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
174f4a2713aSLionel Sambuc            "Can only retain whitespace in raw mode or -traditional-cpp");
175f4a2713aSLionel Sambuc     ExtendedTokenMode = Val ? 2 : 0;
176f4a2713aSLionel Sambuc   }
177f4a2713aSLionel Sambuc 
178f4a2713aSLionel Sambuc   /// inKeepCommentMode - Return true if the lexer should return comments as
179f4a2713aSLionel Sambuc   /// tokens.
inKeepCommentMode()180f4a2713aSLionel Sambuc   bool inKeepCommentMode() const {
181f4a2713aSLionel Sambuc     return ExtendedTokenMode > 0;
182f4a2713aSLionel Sambuc   }
183f4a2713aSLionel Sambuc 
184f4a2713aSLionel Sambuc   /// SetCommentRetentionMode - Change the comment retention mode of the lexer
185f4a2713aSLionel Sambuc   /// to the specified mode.  This is really only useful when lexing in raw
186f4a2713aSLionel Sambuc   /// mode, because otherwise the lexer needs to manage this.
SetCommentRetentionState(bool Mode)187f4a2713aSLionel Sambuc   void SetCommentRetentionState(bool Mode) {
188f4a2713aSLionel Sambuc     assert(!isKeepWhitespaceMode() &&
189f4a2713aSLionel Sambuc            "Can't play with comment retention state when retaining whitespace");
190f4a2713aSLionel Sambuc     ExtendedTokenMode = Mode ? 1 : 0;
191f4a2713aSLionel Sambuc   }
192f4a2713aSLionel Sambuc 
193f4a2713aSLionel Sambuc   /// Sets the extended token mode back to its initial value, according to the
194f4a2713aSLionel Sambuc   /// language options and preprocessor. This controls whether the lexer
195f4a2713aSLionel Sambuc   /// produces comment and whitespace tokens.
196f4a2713aSLionel Sambuc   ///
197f4a2713aSLionel Sambuc   /// This requires the lexer to have an associated preprocessor. A standalone
198f4a2713aSLionel Sambuc   /// lexer has nothing to reset to.
199f4a2713aSLionel Sambuc   void resetExtendedTokenMode();
200f4a2713aSLionel Sambuc 
201f4a2713aSLionel Sambuc   /// Gets source code buffer.
getBuffer()202f4a2713aSLionel Sambuc   StringRef getBuffer() const {
203f4a2713aSLionel Sambuc     return StringRef(BufferStart, BufferEnd - BufferStart);
204f4a2713aSLionel Sambuc   }
205f4a2713aSLionel Sambuc 
206f4a2713aSLionel Sambuc   /// ReadToEndOfLine - Read the rest of the current preprocessor line as an
207f4a2713aSLionel Sambuc   /// uninterpreted string.  This switches the lexer out of directive mode.
208*0a6a1f1dSLionel Sambuc   void ReadToEndOfLine(SmallVectorImpl<char> *Result = nullptr);
209f4a2713aSLionel Sambuc 
210f4a2713aSLionel Sambuc 
211f4a2713aSLionel Sambuc   /// Diag - Forwarding function for diagnostics.  This translate a source
212f4a2713aSLionel Sambuc   /// position in the current buffer into a SourceLocation object for rendering.
213f4a2713aSLionel Sambuc   DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const;
214f4a2713aSLionel Sambuc 
215f4a2713aSLionel Sambuc   /// getSourceLocation - Return a source location identifier for the specified
216f4a2713aSLionel Sambuc   /// offset in the current file.
217f4a2713aSLionel Sambuc   SourceLocation getSourceLocation(const char *Loc, unsigned TokLen = 1) const;
218f4a2713aSLionel Sambuc 
219f4a2713aSLionel Sambuc   /// getSourceLocation - Return a source location for the next character in
220f4a2713aSLionel Sambuc   /// the current file.
getSourceLocation()221*0a6a1f1dSLionel Sambuc   SourceLocation getSourceLocation() override {
222*0a6a1f1dSLionel Sambuc     return getSourceLocation(BufferPtr);
223*0a6a1f1dSLionel Sambuc   }
224f4a2713aSLionel Sambuc 
225f4a2713aSLionel Sambuc   /// \brief Return the current location in the buffer.
getBufferLocation()226f4a2713aSLionel Sambuc   const char *getBufferLocation() const { return BufferPtr; }
227f4a2713aSLionel Sambuc 
228f4a2713aSLionel Sambuc   /// Stringify - Convert the specified string into a C string by escaping '\'
229f4a2713aSLionel Sambuc   /// and " characters.  This does not add surrounding ""'s to the string.
230f4a2713aSLionel Sambuc   /// If Charify is true, this escapes the ' character instead of ".
231f4a2713aSLionel Sambuc   static std::string Stringify(const std::string &Str, bool Charify = false);
232f4a2713aSLionel Sambuc 
233f4a2713aSLionel Sambuc   /// Stringify - Convert the specified string into a C string by escaping '\'
234f4a2713aSLionel Sambuc   /// and " characters.  This does not add surrounding ""'s to the string.
235f4a2713aSLionel Sambuc   static void Stringify(SmallVectorImpl<char> &Str);
236f4a2713aSLionel Sambuc 
237f4a2713aSLionel Sambuc 
238f4a2713aSLionel Sambuc   /// getSpelling - This method is used to get the spelling of a token into a
239f4a2713aSLionel Sambuc   /// preallocated buffer, instead of as an std::string.  The caller is required
240f4a2713aSLionel Sambuc   /// to allocate enough space for the token, which is guaranteed to be at least
241f4a2713aSLionel Sambuc   /// Tok.getLength() bytes long.  The length of the actual result is returned.
242f4a2713aSLionel Sambuc   ///
243f4a2713aSLionel Sambuc   /// Note that this method may do two possible things: it may either fill in
244f4a2713aSLionel Sambuc   /// the buffer specified with characters, or it may *change the input pointer*
245f4a2713aSLionel Sambuc   /// to point to a constant buffer with the data already in it (avoiding a
246f4a2713aSLionel Sambuc   /// copy).  The caller is not allowed to modify the returned buffer pointer
247f4a2713aSLionel Sambuc   /// if an internal buffer is returned.
248f4a2713aSLionel Sambuc   static unsigned getSpelling(const Token &Tok, const char *&Buffer,
249f4a2713aSLionel Sambuc                               const SourceManager &SourceMgr,
250f4a2713aSLionel Sambuc                               const LangOptions &LangOpts,
251*0a6a1f1dSLionel Sambuc                               bool *Invalid = nullptr);
252f4a2713aSLionel Sambuc 
253f4a2713aSLionel Sambuc   /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
254f4a2713aSLionel Sambuc   /// token is the characters used to represent the token in the source file
255f4a2713aSLionel Sambuc   /// after trigraph expansion and escaped-newline folding.  In particular, this
256f4a2713aSLionel Sambuc   /// wants to get the true, uncanonicalized, spelling of things like digraphs
257f4a2713aSLionel Sambuc   /// UCNs, etc.
258f4a2713aSLionel Sambuc   static std::string getSpelling(const Token &Tok,
259f4a2713aSLionel Sambuc                                  const SourceManager &SourceMgr,
260f4a2713aSLionel Sambuc                                  const LangOptions &LangOpts,
261*0a6a1f1dSLionel Sambuc                                  bool *Invalid = nullptr);
262f4a2713aSLionel Sambuc 
263f4a2713aSLionel Sambuc   /// getSpelling - This method is used to get the spelling of the
264f4a2713aSLionel Sambuc   /// token at the given source location.  If, as is usually true, it
265f4a2713aSLionel Sambuc   /// is not necessary to copy any data, then the returned string may
266f4a2713aSLionel Sambuc   /// not point into the provided buffer.
267f4a2713aSLionel Sambuc   ///
268f4a2713aSLionel Sambuc   /// This method lexes at the expansion depth of the given
269f4a2713aSLionel Sambuc   /// location and does not jump to the expansion or spelling
270f4a2713aSLionel Sambuc   /// location.
271f4a2713aSLionel Sambuc   static StringRef getSpelling(SourceLocation loc,
272f4a2713aSLionel Sambuc                                SmallVectorImpl<char> &buffer,
273f4a2713aSLionel Sambuc                                const SourceManager &SourceMgr,
274f4a2713aSLionel Sambuc                                const LangOptions &LangOpts,
275*0a6a1f1dSLionel Sambuc                                bool *invalid = nullptr);
276f4a2713aSLionel Sambuc 
277f4a2713aSLionel Sambuc   /// MeasureTokenLength - Relex the token at the specified location and return
278f4a2713aSLionel Sambuc   /// its length in bytes in the input file.  If the token needs cleaning (e.g.
279f4a2713aSLionel Sambuc   /// includes a trigraph or an escaped newline) then this count includes bytes
280f4a2713aSLionel Sambuc   /// that are part of that.
281f4a2713aSLionel Sambuc   static unsigned MeasureTokenLength(SourceLocation Loc,
282f4a2713aSLionel Sambuc                                      const SourceManager &SM,
283f4a2713aSLionel Sambuc                                      const LangOptions &LangOpts);
284f4a2713aSLionel Sambuc 
285f4a2713aSLionel Sambuc   /// \brief Relex the token at the specified location.
286f4a2713aSLionel Sambuc   /// \returns true if there was a failure, false on success.
287f4a2713aSLionel Sambuc   static bool getRawToken(SourceLocation Loc, Token &Result,
288f4a2713aSLionel Sambuc                           const SourceManager &SM,
289f4a2713aSLionel Sambuc                           const LangOptions &LangOpts,
290f4a2713aSLionel Sambuc                           bool IgnoreWhiteSpace = false);
291f4a2713aSLionel Sambuc 
292f4a2713aSLionel Sambuc   /// \brief Given a location any where in a source buffer, find the location
293f4a2713aSLionel Sambuc   /// that corresponds to the beginning of the token in which the original
294f4a2713aSLionel Sambuc   /// source location lands.
295f4a2713aSLionel Sambuc   static SourceLocation GetBeginningOfToken(SourceLocation Loc,
296f4a2713aSLionel Sambuc                                             const SourceManager &SM,
297f4a2713aSLionel Sambuc                                             const LangOptions &LangOpts);
298f4a2713aSLionel Sambuc 
299f4a2713aSLionel Sambuc   /// AdvanceToTokenCharacter - If the current SourceLocation specifies a
300f4a2713aSLionel Sambuc   /// location at the start of a token, return a new location that specifies a
301f4a2713aSLionel Sambuc   /// character within the token.  This handles trigraphs and escaped newlines.
302f4a2713aSLionel Sambuc   static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
303f4a2713aSLionel Sambuc                                                 unsigned Character,
304f4a2713aSLionel Sambuc                                                 const SourceManager &SM,
305f4a2713aSLionel Sambuc                                                 const LangOptions &LangOpts);
306f4a2713aSLionel Sambuc 
307f4a2713aSLionel Sambuc   /// \brief Computes the source location just past the end of the
308f4a2713aSLionel Sambuc   /// token at this source location.
309f4a2713aSLionel Sambuc   ///
310f4a2713aSLionel Sambuc   /// This routine can be used to produce a source location that
311f4a2713aSLionel Sambuc   /// points just past the end of the token referenced by \p Loc, and
312f4a2713aSLionel Sambuc   /// is generally used when a diagnostic needs to point just after a
313f4a2713aSLionel Sambuc   /// token where it expected something different that it received. If
314f4a2713aSLionel Sambuc   /// the returned source location would not be meaningful (e.g., if
315f4a2713aSLionel Sambuc   /// it points into a macro), this routine returns an invalid
316f4a2713aSLionel Sambuc   /// source location.
317f4a2713aSLionel Sambuc   ///
318f4a2713aSLionel Sambuc   /// \param Offset an offset from the end of the token, where the source
319f4a2713aSLionel Sambuc   /// location should refer to. The default offset (0) produces a source
320f4a2713aSLionel Sambuc   /// location pointing just past the end of the token; an offset of 1 produces
321f4a2713aSLionel Sambuc   /// a source location pointing to the last character in the token, etc.
322f4a2713aSLionel Sambuc   static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
323f4a2713aSLionel Sambuc                                             const SourceManager &SM,
324f4a2713aSLionel Sambuc                                             const LangOptions &LangOpts);
325f4a2713aSLionel Sambuc 
326f4a2713aSLionel Sambuc   /// \brief Returns true if the given MacroID location points at the first
327f4a2713aSLionel Sambuc   /// token of the macro expansion.
328f4a2713aSLionel Sambuc   ///
329f4a2713aSLionel Sambuc   /// \param MacroBegin If non-null and function returns true, it is set to
330f4a2713aSLionel Sambuc   /// begin location of the macro.
331f4a2713aSLionel Sambuc   static bool isAtStartOfMacroExpansion(SourceLocation loc,
332f4a2713aSLionel Sambuc                                         const SourceManager &SM,
333f4a2713aSLionel Sambuc                                         const LangOptions &LangOpts,
334*0a6a1f1dSLionel Sambuc                                         SourceLocation *MacroBegin = nullptr);
335f4a2713aSLionel Sambuc 
336f4a2713aSLionel Sambuc   /// \brief Returns true if the given MacroID location points at the last
337f4a2713aSLionel Sambuc   /// token of the macro expansion.
338f4a2713aSLionel Sambuc   ///
339f4a2713aSLionel Sambuc   /// \param MacroEnd If non-null and function returns true, it is set to
340f4a2713aSLionel Sambuc   /// end location of the macro.
341f4a2713aSLionel Sambuc   static bool isAtEndOfMacroExpansion(SourceLocation loc,
342f4a2713aSLionel Sambuc                                       const SourceManager &SM,
343f4a2713aSLionel Sambuc                                       const LangOptions &LangOpts,
344*0a6a1f1dSLionel Sambuc                                       SourceLocation *MacroEnd = nullptr);
345f4a2713aSLionel Sambuc 
346f4a2713aSLionel Sambuc   /// \brief Accepts a range and returns a character range with file locations.
347f4a2713aSLionel Sambuc   ///
348f4a2713aSLionel Sambuc   /// Returns a null range if a part of the range resides inside a macro
349f4a2713aSLionel Sambuc   /// expansion or the range does not reside on the same FileID.
350f4a2713aSLionel Sambuc   ///
351f4a2713aSLionel Sambuc   /// This function is trying to deal with macros and return a range based on
352f4a2713aSLionel Sambuc   /// file locations. The cases where it can successfully handle macros are:
353f4a2713aSLionel Sambuc   ///
354f4a2713aSLionel Sambuc   /// -begin or end range lies at the start or end of a macro expansion, in
355f4a2713aSLionel Sambuc   ///  which case the location will be set to the expansion point, e.g:
356f4a2713aSLionel Sambuc   ///    \#define M 1 2
357f4a2713aSLionel Sambuc   ///    a M
358f4a2713aSLionel Sambuc   /// If you have a range [a, 2] (where 2 came from the macro), the function
359f4a2713aSLionel Sambuc   /// will return a range for "a M"
360f4a2713aSLionel Sambuc   /// if you have range [a, 1], the function will fail because the range
361f4a2713aSLionel Sambuc   /// overlaps with only a part of the macro
362f4a2713aSLionel Sambuc   ///
363f4a2713aSLionel Sambuc   /// -The macro is a function macro and the range can be mapped to the macro
364f4a2713aSLionel Sambuc   ///  arguments, e.g:
365f4a2713aSLionel Sambuc   ///    \#define M 1 2
366f4a2713aSLionel Sambuc   ///    \#define FM(x) x
367f4a2713aSLionel Sambuc   ///    FM(a b M)
368f4a2713aSLionel Sambuc   /// if you have range [b, 2], the function will return the file range "b M"
369f4a2713aSLionel Sambuc   /// inside the macro arguments.
370f4a2713aSLionel Sambuc   /// if you have range [a, 2], the function will return the file range
371f4a2713aSLionel Sambuc   /// "FM(a b M)" since the range includes all of the macro expansion.
372f4a2713aSLionel Sambuc   static CharSourceRange makeFileCharRange(CharSourceRange Range,
373f4a2713aSLionel Sambuc                                            const SourceManager &SM,
374f4a2713aSLionel Sambuc                                            const LangOptions &LangOpts);
375f4a2713aSLionel Sambuc 
376f4a2713aSLionel Sambuc   /// \brief Returns a string for the source that the range encompasses.
377f4a2713aSLionel Sambuc   static StringRef getSourceText(CharSourceRange Range,
378f4a2713aSLionel Sambuc                                  const SourceManager &SM,
379f4a2713aSLionel Sambuc                                  const LangOptions &LangOpts,
380*0a6a1f1dSLionel Sambuc                                  bool *Invalid = nullptr);
381f4a2713aSLionel Sambuc 
382f4a2713aSLionel Sambuc   /// \brief Retrieve the name of the immediate macro expansion.
383f4a2713aSLionel Sambuc   ///
384f4a2713aSLionel Sambuc   /// This routine starts from a source location, and finds the name of the macro
385f4a2713aSLionel Sambuc   /// responsible for its immediate expansion. It looks through any intervening
386f4a2713aSLionel Sambuc   /// macro argument expansions to compute this. It returns a StringRef which
387f4a2713aSLionel Sambuc   /// refers to the SourceManager-owned buffer of the source where that macro
388f4a2713aSLionel Sambuc   /// name is spelled. Thus, the result shouldn't out-live that SourceManager.
389f4a2713aSLionel Sambuc   static StringRef getImmediateMacroName(SourceLocation Loc,
390f4a2713aSLionel Sambuc                                          const SourceManager &SM,
391f4a2713aSLionel Sambuc                                          const LangOptions &LangOpts);
392f4a2713aSLionel Sambuc 
393f4a2713aSLionel Sambuc   /// \brief Compute the preamble of the given file.
394f4a2713aSLionel Sambuc   ///
395f4a2713aSLionel Sambuc   /// The preamble of a file contains the initial comments, include directives,
396f4a2713aSLionel Sambuc   /// and other preprocessor directives that occur before the code in this
397f4a2713aSLionel Sambuc   /// particular file actually begins. The preamble of the main source file is
398f4a2713aSLionel Sambuc   /// a potential prefix header.
399f4a2713aSLionel Sambuc   ///
400f4a2713aSLionel Sambuc   /// \param Buffer The memory buffer containing the file's contents.
401f4a2713aSLionel Sambuc   ///
402f4a2713aSLionel Sambuc   /// \param MaxLines If non-zero, restrict the length of the preamble
403f4a2713aSLionel Sambuc   /// to fewer than this number of lines.
404f4a2713aSLionel Sambuc   ///
405f4a2713aSLionel Sambuc   /// \returns The offset into the file where the preamble ends and the rest
406f4a2713aSLionel Sambuc   /// of the file begins along with a boolean value indicating whether
407f4a2713aSLionel Sambuc   /// the preamble ends at the beginning of a new line.
408*0a6a1f1dSLionel Sambuc   static std::pair<unsigned, bool> ComputePreamble(StringRef Buffer,
409*0a6a1f1dSLionel Sambuc                                                    const LangOptions &LangOpts,
410f4a2713aSLionel Sambuc                                                    unsigned MaxLines = 0);
411f4a2713aSLionel Sambuc 
412f4a2713aSLionel Sambuc   /// \brief Checks that the given token is the first token that occurs after
413f4a2713aSLionel Sambuc   /// the given location (this excludes comments and whitespace). Returns the
414f4a2713aSLionel Sambuc   /// location immediately after the specified token. If the token is not found
415f4a2713aSLionel Sambuc   /// or the location is inside a macro, the returned source location will be
416f4a2713aSLionel Sambuc   /// invalid.
417f4a2713aSLionel Sambuc   static SourceLocation findLocationAfterToken(SourceLocation loc,
418f4a2713aSLionel Sambuc                                          tok::TokenKind TKind,
419f4a2713aSLionel Sambuc                                          const SourceManager &SM,
420f4a2713aSLionel Sambuc                                          const LangOptions &LangOpts,
421f4a2713aSLionel Sambuc                                          bool SkipTrailingWhitespaceAndNewLine);
422f4a2713aSLionel Sambuc 
423f4a2713aSLionel Sambuc   /// \brief Returns true if the given character could appear in an identifier.
424f4a2713aSLionel Sambuc   static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts);
425f4a2713aSLionel Sambuc 
426f4a2713aSLionel Sambuc   /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever
427f4a2713aSLionel Sambuc   /// emit a warning.
getCharAndSizeNoWarn(const char * Ptr,unsigned & Size,const LangOptions & LangOpts)428f4a2713aSLionel Sambuc   static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size,
429f4a2713aSLionel Sambuc                                           const LangOptions &LangOpts) {
430f4a2713aSLionel Sambuc     // If this is not a trigraph and not a UCN or escaped newline, return
431f4a2713aSLionel Sambuc     // quickly.
432f4a2713aSLionel Sambuc     if (isObviouslySimpleCharacter(Ptr[0])) {
433f4a2713aSLionel Sambuc       Size = 1;
434f4a2713aSLionel Sambuc       return *Ptr;
435f4a2713aSLionel Sambuc     }
436f4a2713aSLionel Sambuc 
437f4a2713aSLionel Sambuc     Size = 0;
438f4a2713aSLionel Sambuc     return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
439f4a2713aSLionel Sambuc   }
440f4a2713aSLionel Sambuc 
441f4a2713aSLionel Sambuc   //===--------------------------------------------------------------------===//
442f4a2713aSLionel Sambuc   // Internal implementation interfaces.
443f4a2713aSLionel Sambuc private:
444f4a2713aSLionel Sambuc 
445f4a2713aSLionel Sambuc   /// LexTokenInternal - Internal interface to lex a preprocessing token. Called
446f4a2713aSLionel Sambuc   /// by Lex.
447f4a2713aSLionel Sambuc   ///
448f4a2713aSLionel Sambuc   bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);
449f4a2713aSLionel Sambuc 
450f4a2713aSLionel Sambuc   bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);
451f4a2713aSLionel Sambuc 
452f4a2713aSLionel Sambuc   /// Given that a token begins with the Unicode character \p C, figure out
453f4a2713aSLionel Sambuc   /// what kind of token it is and dispatch to the appropriate lexing helper
454f4a2713aSLionel Sambuc   /// function.
455f4a2713aSLionel Sambuc   bool LexUnicode(Token &Result, uint32_t C, const char *CurPtr);
456f4a2713aSLionel Sambuc 
457f4a2713aSLionel Sambuc   /// FormTokenWithChars - When we lex a token, we have identified a span
458f4a2713aSLionel Sambuc   /// starting at BufferPtr, going to TokEnd that forms the token.  This method
459f4a2713aSLionel Sambuc   /// takes that range and assigns it to the token as its location and size.  In
460f4a2713aSLionel Sambuc   /// addition, since tokens cannot overlap, this also updates BufferPtr to be
461f4a2713aSLionel Sambuc   /// TokEnd.
FormTokenWithChars(Token & Result,const char * TokEnd,tok::TokenKind Kind)462f4a2713aSLionel Sambuc   void FormTokenWithChars(Token &Result, const char *TokEnd,
463f4a2713aSLionel Sambuc                           tok::TokenKind Kind) {
464f4a2713aSLionel Sambuc     unsigned TokLen = TokEnd-BufferPtr;
465f4a2713aSLionel Sambuc     Result.setLength(TokLen);
466f4a2713aSLionel Sambuc     Result.setLocation(getSourceLocation(BufferPtr, TokLen));
467f4a2713aSLionel Sambuc     Result.setKind(Kind);
468f4a2713aSLionel Sambuc     BufferPtr = TokEnd;
469f4a2713aSLionel Sambuc   }
470f4a2713aSLionel Sambuc 
471f4a2713aSLionel Sambuc   /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a
472f4a2713aSLionel Sambuc   /// tok::l_paren token, 0 if it is something else and 2 if there are no more
473f4a2713aSLionel Sambuc   /// tokens in the buffer controlled by this lexer.
474f4a2713aSLionel Sambuc   unsigned isNextPPTokenLParen();
475f4a2713aSLionel Sambuc 
476f4a2713aSLionel Sambuc   //===--------------------------------------------------------------------===//
477f4a2713aSLionel Sambuc   // Lexer character reading interfaces.
478f4a2713aSLionel Sambuc 
479f4a2713aSLionel Sambuc   // This lexer is built on two interfaces for reading characters, both of which
480f4a2713aSLionel Sambuc   // automatically provide phase 1/2 translation.  getAndAdvanceChar is used
481f4a2713aSLionel Sambuc   // when we know that we will be reading a character from the input buffer and
482f4a2713aSLionel Sambuc   // that this character will be part of the result token. This occurs in (f.e.)
483f4a2713aSLionel Sambuc   // string processing, because we know we need to read until we find the
484f4a2713aSLionel Sambuc   // closing '"' character.
485f4a2713aSLionel Sambuc   //
486f4a2713aSLionel Sambuc   // The second interface is the combination of getCharAndSize with
487f4a2713aSLionel Sambuc   // ConsumeChar.  getCharAndSize reads a phase 1/2 translated character,
488f4a2713aSLionel Sambuc   // returning it and its size.  If the lexer decides that this character is
489f4a2713aSLionel Sambuc   // part of the current token, it calls ConsumeChar on it.  This two stage
490f4a2713aSLionel Sambuc   // approach allows us to emit diagnostics for characters (e.g. warnings about
491f4a2713aSLionel Sambuc   // trigraphs), knowing that they only are emitted if the character is
492f4a2713aSLionel Sambuc   // consumed.
493f4a2713aSLionel Sambuc 
494f4a2713aSLionel Sambuc   /// isObviouslySimpleCharacter - Return true if the specified character is
495f4a2713aSLionel Sambuc   /// obviously the same in translation phase 1 and translation phase 3.  This
496f4a2713aSLionel Sambuc   /// can return false for characters that end up being the same, but it will
497f4a2713aSLionel Sambuc   /// never return true for something that needs to be mapped.
isObviouslySimpleCharacter(char C)498f4a2713aSLionel Sambuc   static bool isObviouslySimpleCharacter(char C) {
499f4a2713aSLionel Sambuc     return C != '?' && C != '\\';
500f4a2713aSLionel Sambuc   }
501f4a2713aSLionel Sambuc 
502f4a2713aSLionel Sambuc   /// getAndAdvanceChar - Read a single 'character' from the specified buffer,
503f4a2713aSLionel Sambuc   /// advance over it, and return it.  This is tricky in several cases.  Here we
504f4a2713aSLionel Sambuc   /// just handle the trivial case and fall-back to the non-inlined
505f4a2713aSLionel Sambuc   /// getCharAndSizeSlow method to handle the hard case.
getAndAdvanceChar(const char * & Ptr,Token & Tok)506f4a2713aSLionel Sambuc   inline char getAndAdvanceChar(const char *&Ptr, Token &Tok) {
507f4a2713aSLionel Sambuc     // If this is not a trigraph and not a UCN or escaped newline, return
508f4a2713aSLionel Sambuc     // quickly.
509f4a2713aSLionel Sambuc     if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++;
510f4a2713aSLionel Sambuc 
511f4a2713aSLionel Sambuc     unsigned Size = 0;
512f4a2713aSLionel Sambuc     char C = getCharAndSizeSlow(Ptr, Size, &Tok);
513f4a2713aSLionel Sambuc     Ptr += Size;
514f4a2713aSLionel Sambuc     return C;
515f4a2713aSLionel Sambuc   }
516f4a2713aSLionel Sambuc 
517f4a2713aSLionel Sambuc   /// ConsumeChar - When a character (identified by getCharAndSize) is consumed
518f4a2713aSLionel Sambuc   /// and added to a given token, check to see if there are diagnostics that
519f4a2713aSLionel Sambuc   /// need to be emitted or flags that need to be set on the token.  If so, do
520f4a2713aSLionel Sambuc   /// it.
ConsumeChar(const char * Ptr,unsigned Size,Token & Tok)521f4a2713aSLionel Sambuc   const char *ConsumeChar(const char *Ptr, unsigned Size, Token &Tok) {
522f4a2713aSLionel Sambuc     // Normal case, we consumed exactly one token.  Just return it.
523f4a2713aSLionel Sambuc     if (Size == 1)
524f4a2713aSLionel Sambuc       return Ptr+Size;
525f4a2713aSLionel Sambuc 
526f4a2713aSLionel Sambuc     // Otherwise, re-lex the character with a current token, allowing
527f4a2713aSLionel Sambuc     // diagnostics to be emitted and flags to be set.
528f4a2713aSLionel Sambuc     Size = 0;
529f4a2713aSLionel Sambuc     getCharAndSizeSlow(Ptr, Size, &Tok);
530f4a2713aSLionel Sambuc     return Ptr+Size;
531f4a2713aSLionel Sambuc   }
532f4a2713aSLionel Sambuc 
533f4a2713aSLionel Sambuc   /// getCharAndSize - Peek a single 'character' from the specified buffer,
534f4a2713aSLionel Sambuc   /// get its size, and return it.  This is tricky in several cases.  Here we
535f4a2713aSLionel Sambuc   /// just handle the trivial case and fall-back to the non-inlined
536f4a2713aSLionel Sambuc   /// getCharAndSizeSlow method to handle the hard case.
getCharAndSize(const char * Ptr,unsigned & Size)537f4a2713aSLionel Sambuc   inline char getCharAndSize(const char *Ptr, unsigned &Size) {
538f4a2713aSLionel Sambuc     // If this is not a trigraph and not a UCN or escaped newline, return
539f4a2713aSLionel Sambuc     // quickly.
540f4a2713aSLionel Sambuc     if (isObviouslySimpleCharacter(Ptr[0])) {
541f4a2713aSLionel Sambuc       Size = 1;
542f4a2713aSLionel Sambuc       return *Ptr;
543f4a2713aSLionel Sambuc     }
544f4a2713aSLionel Sambuc 
545f4a2713aSLionel Sambuc     Size = 0;
546f4a2713aSLionel Sambuc     return getCharAndSizeSlow(Ptr, Size);
547f4a2713aSLionel Sambuc   }
548f4a2713aSLionel Sambuc 
549f4a2713aSLionel Sambuc   /// getCharAndSizeSlow - Handle the slow/uncommon case of the getCharAndSize
550f4a2713aSLionel Sambuc   /// method.
551*0a6a1f1dSLionel Sambuc   char getCharAndSizeSlow(const char *Ptr, unsigned &Size,
552*0a6a1f1dSLionel Sambuc                           Token *Tok = nullptr);
553f4a2713aSLionel Sambuc 
554f4a2713aSLionel Sambuc   /// getEscapedNewLineSize - Return the size of the specified escaped newline,
555f4a2713aSLionel Sambuc   /// or 0 if it is not an escaped newline. P[-1] is known to be a "\" on entry
556f4a2713aSLionel Sambuc   /// to this function.
557f4a2713aSLionel Sambuc   static unsigned getEscapedNewLineSize(const char *P);
558f4a2713aSLionel Sambuc 
559f4a2713aSLionel Sambuc   /// SkipEscapedNewLines - If P points to an escaped newline (or a series of
560f4a2713aSLionel Sambuc   /// them), skip over them and return the first non-escaped-newline found,
561f4a2713aSLionel Sambuc   /// otherwise return P.
562f4a2713aSLionel Sambuc   static const char *SkipEscapedNewLines(const char *P);
563f4a2713aSLionel Sambuc 
564f4a2713aSLionel Sambuc   /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a
565f4a2713aSLionel Sambuc   /// diagnostic.
566f4a2713aSLionel Sambuc   static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
567f4a2713aSLionel Sambuc                                        const LangOptions &LangOpts);
568f4a2713aSLionel Sambuc 
569f4a2713aSLionel Sambuc   //===--------------------------------------------------------------------===//
570f4a2713aSLionel Sambuc   // Other lexer functions.
571f4a2713aSLionel Sambuc 
572f4a2713aSLionel Sambuc   void SkipBytes(unsigned Bytes, bool StartOfLine);
573f4a2713aSLionel Sambuc 
574f4a2713aSLionel Sambuc   void PropagateLineStartLeadingSpaceInfo(Token &Result);
575f4a2713aSLionel Sambuc 
576f4a2713aSLionel Sambuc   const char *LexUDSuffix(Token &Result, const char *CurPtr,
577f4a2713aSLionel Sambuc                           bool IsStringLiteral);
578f4a2713aSLionel Sambuc 
579f4a2713aSLionel Sambuc   // Helper functions to lex the remainder of a token of the specific type.
580f4a2713aSLionel Sambuc   bool LexIdentifier         (Token &Result, const char *CurPtr);
581f4a2713aSLionel Sambuc   bool LexNumericConstant    (Token &Result, const char *CurPtr);
582f4a2713aSLionel Sambuc   bool LexStringLiteral      (Token &Result, const char *CurPtr,
583f4a2713aSLionel Sambuc                               tok::TokenKind Kind);
584f4a2713aSLionel Sambuc   bool LexRawStringLiteral   (Token &Result, const char *CurPtr,
585f4a2713aSLionel Sambuc                               tok::TokenKind Kind);
586f4a2713aSLionel Sambuc   bool LexAngledStringLiteral(Token &Result, const char *CurPtr);
587f4a2713aSLionel Sambuc   bool LexCharConstant       (Token &Result, const char *CurPtr,
588f4a2713aSLionel Sambuc                               tok::TokenKind Kind);
589f4a2713aSLionel Sambuc   bool LexEndOfFile          (Token &Result, const char *CurPtr);
590f4a2713aSLionel Sambuc   bool SkipWhitespace        (Token &Result, const char *CurPtr,
591f4a2713aSLionel Sambuc                               bool &TokAtPhysicalStartOfLine);
592f4a2713aSLionel Sambuc   bool SkipLineComment       (Token &Result, const char *CurPtr,
593f4a2713aSLionel Sambuc                               bool &TokAtPhysicalStartOfLine);
594f4a2713aSLionel Sambuc   bool SkipBlockComment      (Token &Result, const char *CurPtr,
595f4a2713aSLionel Sambuc                               bool &TokAtPhysicalStartOfLine);
596f4a2713aSLionel Sambuc   bool SaveLineComment       (Token &Result, const char *CurPtr);
597f4a2713aSLionel Sambuc 
598f4a2713aSLionel Sambuc   bool IsStartOfConflictMarker(const char *CurPtr);
599f4a2713aSLionel Sambuc   bool HandleEndOfConflictMarker(const char *CurPtr);
600f4a2713aSLionel Sambuc 
601f4a2713aSLionel Sambuc   bool isCodeCompletionPoint(const char *CurPtr) const;
cutOffLexing()602f4a2713aSLionel Sambuc   void cutOffLexing() { BufferPtr = BufferEnd; }
603f4a2713aSLionel Sambuc 
604f4a2713aSLionel Sambuc   bool isHexaLiteral(const char *Start, const LangOptions &LangOpts);
605f4a2713aSLionel Sambuc 
606f4a2713aSLionel Sambuc 
607f4a2713aSLionel Sambuc   /// Read a universal character name.
608f4a2713aSLionel Sambuc   ///
609f4a2713aSLionel Sambuc   /// \param CurPtr The position in the source buffer after the initial '\'.
610f4a2713aSLionel Sambuc   ///               If the UCN is syntactically well-formed (but not necessarily
611f4a2713aSLionel Sambuc   ///               valid), this parameter will be updated to point to the
612f4a2713aSLionel Sambuc   ///               character after the UCN.
613f4a2713aSLionel Sambuc   /// \param SlashLoc The position in the source buffer of the '\'.
614f4a2713aSLionel Sambuc   /// \param Tok The token being formed. Pass \c NULL to suppress diagnostics
615f4a2713aSLionel Sambuc   ///            and handle token formation in the caller.
616f4a2713aSLionel Sambuc   ///
617f4a2713aSLionel Sambuc   /// \return The Unicode codepoint specified by the UCN, or 0 if the UCN is
618f4a2713aSLionel Sambuc   ///         invalid.
619f4a2713aSLionel Sambuc   uint32_t tryReadUCN(const char *&CurPtr, const char *SlashLoc, Token *Tok);
620f4a2713aSLionel Sambuc 
621*0a6a1f1dSLionel Sambuc   /// \brief Try to consume a UCN as part of an identifier at the current
622*0a6a1f1dSLionel Sambuc   /// location.
623*0a6a1f1dSLionel Sambuc   /// \param CurPtr Initially points to the range of characters in the source
624*0a6a1f1dSLionel Sambuc   ///               buffer containing the '\'. Updated to point past the end of
625*0a6a1f1dSLionel Sambuc   ///               the UCN on success.
626*0a6a1f1dSLionel Sambuc   /// \param Size The number of characters occupied by the '\' (including
627*0a6a1f1dSLionel Sambuc   ///             trigraphs and escaped newlines).
628*0a6a1f1dSLionel Sambuc   /// \param Result The token being produced. Marked as containing a UCN on
629*0a6a1f1dSLionel Sambuc   ///               success.
630*0a6a1f1dSLionel Sambuc   /// \return \c true if a UCN was lexed and it produced an acceptable
631*0a6a1f1dSLionel Sambuc   ///         identifier character, \c false otherwise.
632*0a6a1f1dSLionel Sambuc   bool tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
633*0a6a1f1dSLionel Sambuc                                Token &Result);
634*0a6a1f1dSLionel Sambuc 
635*0a6a1f1dSLionel Sambuc   /// \brief Try to consume an identifier character encoded in UTF-8.
636*0a6a1f1dSLionel Sambuc   /// \param CurPtr Points to the start of the (potential) UTF-8 code unit
637*0a6a1f1dSLionel Sambuc   ///        sequence. On success, updated to point past the end of it.
638*0a6a1f1dSLionel Sambuc   /// \return \c true if a UTF-8 sequence mapping to an acceptable identifier
639*0a6a1f1dSLionel Sambuc   ///         character was lexed, \c false otherwise.
640*0a6a1f1dSLionel Sambuc   bool tryConsumeIdentifierUTF8Char(const char *&CurPtr);
641*0a6a1f1dSLionel Sambuc };
642f4a2713aSLionel Sambuc 
643f4a2713aSLionel Sambuc }  // end namespace clang
644f4a2713aSLionel Sambuc 
645f4a2713aSLionel Sambuc #endif
646