1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
16 
17 #include "clang/Basic/Diagnostic.h"
18 #include "clang/Basic/DiagnosticIDs.h"
19 #include "clang/Basic/IdentifierTable.h"
20 #include "clang/Basic/LLVM.h"
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/Module.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/HeaderSearch.h"
27 #include "clang/Lex/Lexer.h"
28 #include "clang/Lex/MacroInfo.h"
29 #include "clang/Lex/ModuleLoader.h"
30 #include "clang/Lex/ModuleMap.h"
31 #include "clang/Lex/PPCallbacks.h"
32 #include "clang/Lex/Token.h"
33 #include "clang/Lex/TokenLexer.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/FoldingSet.h"
37 #include "llvm/ADT/FunctionExtras.h"
38 #include "llvm/ADT/None.h"
39 #include "llvm/ADT/Optional.h"
40 #include "llvm/ADT/PointerUnion.h"
41 #include "llvm/ADT/STLExtras.h"
42 #include "llvm/ADT/SmallPtrSet.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/ADT/StringRef.h"
45 #include "llvm/ADT/TinyPtrVector.h"
46 #include "llvm/ADT/iterator_range.h"
47 #include "llvm/Support/Allocator.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/Registry.h"
50 #include <cassert>
51 #include <cstddef>
52 #include <cstdint>
53 #include <map>
54 #include <memory>
55 #include <string>
56 #include <utility>
57 #include <vector>
58 
59 namespace llvm {
60 
61 template<unsigned InternalLen> class SmallString;
62 
63 } // namespace llvm
64 
65 namespace clang {
66 
67 class CodeCompletionHandler;
68 class CommentHandler;
69 class DirectoryEntry;
70 class EmptylineHandler;
71 class ExternalPreprocessorSource;
72 class FileEntry;
73 class FileManager;
74 class HeaderSearch;
75 class MacroArgs;
76 class PragmaHandler;
77 class PragmaNamespace;
78 class PreprocessingRecord;
79 class PreprocessorLexer;
80 class PreprocessorOptions;
81 class ScratchBuffer;
82 class TargetInfo;
83 
84 namespace Builtin {
85 class Context;
86 }
87 
88 /// Stores token information for comparing actual tokens with
89 /// predefined values.  Only handles simple tokens and identifiers.
90 class TokenValue {
91   tok::TokenKind Kind;
92   IdentifierInfo *II;
93 
94 public:
95   TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
96     assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
97     assert(Kind != tok::identifier &&
98            "Identifiers should be created by TokenValue(IdentifierInfo *)");
99     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
100     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
101   }
102 
103   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
104 
105   bool operator==(const Token &Tok) const {
106     return Tok.getKind() == Kind &&
107         (!II || II == Tok.getIdentifierInfo());
108   }
109 };
110 
111 /// Context in which macro name is used.
112 enum MacroUse {
113   // other than #define or #undef
114   MU_Other  = 0,
115 
116   // macro name specified in #define
117   MU_Define = 1,
118 
119   // macro name specified in #undef
120   MU_Undef  = 2
121 };
122 
123 /// Engages in a tight little dance with the lexer to efficiently
124 /// preprocess tokens.
125 ///
126 /// Lexers know only about tokens within a single source file, and don't
127 /// know anything about preprocessor-level issues like the \#include stack,
128 /// token expansion, etc.
129 class Preprocessor {
130   friend class VAOptDefinitionContext;
131   friend class VariadicMacroScopeGuard;
132 
133   llvm::unique_function<void(const clang::Token &)> OnToken;
134   std::shared_ptr<PreprocessorOptions> PPOpts;
135   DiagnosticsEngine        *Diags;
136   LangOptions       &LangOpts;
137   const TargetInfo *Target = nullptr;
138   const TargetInfo *AuxTarget = nullptr;
139   FileManager       &FileMgr;
140   SourceManager     &SourceMgr;
141   std::unique_ptr<ScratchBuffer> ScratchBuf;
142   HeaderSearch      &HeaderInfo;
143   ModuleLoader      &TheModuleLoader;
144 
145   /// External source of macros.
146   ExternalPreprocessorSource *ExternalSource;
147 
148   /// A BumpPtrAllocator object used to quickly allocate and release
149   /// objects internal to the Preprocessor.
150   llvm::BumpPtrAllocator BP;
151 
152   /// Identifiers for builtin macros and other builtins.
153   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
154   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
155   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
156   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
157   IdentifierInfo *Ident__FILE_NAME__;              // __FILE_NAME__
158   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
159   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
160   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
161   IdentifierInfo *Ident__identifier;               // __identifier
162   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
163   IdentifierInfo *Ident__VA_OPT__;                 // __VA_OPT__
164   IdentifierInfo *Ident__has_feature;              // __has_feature
165   IdentifierInfo *Ident__has_extension;            // __has_extension
166   IdentifierInfo *Ident__has_builtin;              // __has_builtin
167   IdentifierInfo *Ident__has_attribute;            // __has_attribute
168   IdentifierInfo *Ident__has_include;              // __has_include
169   IdentifierInfo *Ident__has_include_next;         // __has_include_next
170   IdentifierInfo *Ident__has_warning;              // __has_warning
171   IdentifierInfo *Ident__is_identifier;            // __is_identifier
172   IdentifierInfo *Ident__building_module;          // __building_module
173   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
174   IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
175   IdentifierInfo *Ident__has_c_attribute;          // __has_c_attribute
176   IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
177   IdentifierInfo *Ident__is_target_arch;           // __is_target_arch
178   IdentifierInfo *Ident__is_target_vendor;         // __is_target_vendor
179   IdentifierInfo *Ident__is_target_os;             // __is_target_os
180   IdentifierInfo *Ident__is_target_environment;    // __is_target_environment
181   IdentifierInfo *Ident__is_target_variant_os;
182   IdentifierInfo *Ident__is_target_variant_environment;
183   IdentifierInfo *Ident__FLT_EVAL_METHOD__;        // __FLT_EVAL_METHOD
184 
185   // Weak, only valid (and set) while InMacroArgs is true.
186   Token* ArgMacro;
187 
188   SourceLocation DATELoc, TIMELoc;
189 
190   // FEM_UnsetOnCommandLine means that an explicit evaluation method was
191   // not specified on the command line. The target is queried to set the
192   // default evaluation method.
193   LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
194       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
195 
196   // Keeps the value of the last evaluation method before a
197   // `pragma float_control (precise,off) is applied.
198   LangOptions::FPEvalMethodKind LastFPEvalMethod =
199       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
200 
201   // The most recent pragma location where the floating point evaluation
202   // method was modified. This is used to determine whether the
203   // 'pragma clang fp eval_method' was used whithin the current scope.
204   SourceLocation LastFPEvalPragmaLocation;
205 
206   LangOptions::FPEvalMethodKind TUFPEvalMethod =
207       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
208 
209   // Next __COUNTER__ value, starts at 0.
210   unsigned CounterValue = 0;
211 
212   enum {
213     /// Maximum depth of \#includes.
214     MaxAllowedIncludeStackDepth = 200
215   };
216 
217   // State that is set before the preprocessor begins.
218   bool KeepComments : 1;
219   bool KeepMacroComments : 1;
220   bool SuppressIncludeNotFoundError : 1;
221 
222   // State that changes while the preprocessor runs:
223   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
224 
225   /// Whether the preprocessor owns the header search object.
226   bool OwnsHeaderSearch : 1;
227 
228   /// True if macro expansion is disabled.
229   bool DisableMacroExpansion : 1;
230 
231   /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
232   /// when parsing preprocessor directives.
233   bool MacroExpansionInDirectivesOverride : 1;
234 
235   class ResetMacroExpansionHelper;
236 
237   /// Whether we have already loaded macros from the external source.
238   mutable bool ReadMacrosFromExternalSource : 1;
239 
240   /// True if pragmas are enabled.
241   bool PragmasEnabled : 1;
242 
243   /// True if the current build action is a preprocessing action.
244   bool PreprocessedOutput : 1;
245 
246   /// True if we are currently preprocessing a #if or #elif directive
247   bool ParsingIfOrElifDirective;
248 
249   /// True if we are pre-expanding macro arguments.
250   bool InMacroArgPreExpansion;
251 
252   /// Mapping/lookup information for all identifiers in
253   /// the program, including program keywords.
254   mutable IdentifierTable Identifiers;
255 
256   /// This table contains all the selectors in the program.
257   ///
258   /// Unlike IdentifierTable above, this table *isn't* populated by the
259   /// preprocessor. It is declared/expanded here because its role/lifetime is
260   /// conceptually similar to the IdentifierTable. In addition, the current
261   /// control flow (in clang::ParseAST()), make it convenient to put here.
262   ///
263   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
264   /// the lifetime of the preprocessor.
265   SelectorTable Selectors;
266 
267   /// Information about builtins.
268   std::unique_ptr<Builtin::Context> BuiltinInfo;
269 
270   /// Tracks all of the pragmas that the client registered
271   /// with this preprocessor.
272   std::unique_ptr<PragmaNamespace> PragmaHandlers;
273 
274   /// Pragma handlers of the original source is stored here during the
275   /// parsing of a model file.
276   std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
277 
278   /// Tracks all of the comment handlers that the client registered
279   /// with this preprocessor.
280   std::vector<CommentHandler *> CommentHandlers;
281 
282   /// Empty line handler.
283   EmptylineHandler *Emptyline = nullptr;
284 
285   /// True if we want to ignore EOF token and continue later on (thus
286   /// avoid tearing the Lexer and etc. down).
287   bool IncrementalProcessing = false;
288 
289 public:
290   /// The kind of translation unit we are processing.
291   const TranslationUnitKind TUKind;
292 
293 private:
294   /// The code-completion handler.
295   CodeCompletionHandler *CodeComplete = nullptr;
296 
297   /// The file that we're performing code-completion for, if any.
298   const FileEntry *CodeCompletionFile = nullptr;
299 
300   /// The offset in file for the code-completion point.
301   unsigned CodeCompletionOffset = 0;
302 
303   /// The location for the code-completion point. This gets instantiated
304   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
305   SourceLocation CodeCompletionLoc;
306 
307   /// The start location for the file of the code-completion point.
308   ///
309   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
310   /// for preprocessing.
311   SourceLocation CodeCompletionFileLoc;
312 
313   /// The source location of the \c import contextual keyword we just
314   /// lexed, if any.
315   SourceLocation ModuleImportLoc;
316 
317   /// The module import path that we're currently processing.
318   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
319 
320   /// Whether the last token we lexed was an '@'.
321   bool LastTokenWasAt = false;
322 
323   /// A position within a C++20 import-seq.
324   class ImportSeq {
325   public:
326     enum State : int {
327       // Positive values represent a number of unclosed brackets.
328       AtTopLevel = 0,
329       AfterTopLevelTokenSeq = -1,
330       AfterExport = -2,
331       AfterImportSeq = -3,
332     };
333 
334     ImportSeq(State S) : S(S) {}
335 
336     /// Saw any kind of open bracket.
337     void handleOpenBracket() {
338       S = static_cast<State>(std::max<int>(S, 0) + 1);
339     }
340     /// Saw any kind of close bracket other than '}'.
341     void handleCloseBracket() {
342       S = static_cast<State>(std::max<int>(S, 1) - 1);
343     }
344     /// Saw a close brace.
345     void handleCloseBrace() {
346       handleCloseBracket();
347       if (S == AtTopLevel && !AfterHeaderName)
348         S = AfterTopLevelTokenSeq;
349     }
350     /// Saw a semicolon.
351     void handleSemi() {
352       if (atTopLevel()) {
353         S = AfterTopLevelTokenSeq;
354         AfterHeaderName = false;
355       }
356     }
357 
358     /// Saw an 'export' identifier.
359     void handleExport() {
360       if (S == AfterTopLevelTokenSeq)
361         S = AfterExport;
362       else if (S <= 0)
363         S = AtTopLevel;
364     }
365     /// Saw an 'import' identifier.
366     void handleImport() {
367       if (S == AfterTopLevelTokenSeq || S == AfterExport)
368         S = AfterImportSeq;
369       else if (S <= 0)
370         S = AtTopLevel;
371     }
372 
373     /// Saw a 'header-name' token; do not recognize any more 'import' tokens
374     /// until we reach a top-level semicolon.
375     void handleHeaderName() {
376       if (S == AfterImportSeq)
377         AfterHeaderName = true;
378       handleMisc();
379     }
380 
381     /// Saw any other token.
382     void handleMisc() {
383       if (S <= 0)
384         S = AtTopLevel;
385     }
386 
387     bool atTopLevel() { return S <= 0; }
388     bool afterImportSeq() { return S == AfterImportSeq; }
389     bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
390 
391   private:
392     State S;
393     /// Whether we're in the pp-import-suffix following the header-name in a
394     /// pp-import. If so, a close-brace is not sufficient to end the
395     /// top-level-token-seq of an import-seq.
396     bool AfterHeaderName = false;
397   };
398 
399   /// Our current position within a C++20 import-seq.
400   ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
401 
402   /// Track whether we are in a Global Module Fragment
403   class TrackGMF {
404   public:
405     enum GMFState : int {
406       GMFActive = 1,
407       MaybeGMF = 0,
408       BeforeGMFIntroducer = -1,
409       GMFAbsentOrEnded = -2,
410     };
411 
412     TrackGMF(GMFState S) : S(S) {}
413 
414     /// Saw a semicolon.
415     void handleSemi() {
416       // If it is immediately after the first instance of the module keyword,
417       // then that introduces the GMF.
418       if (S == MaybeGMF)
419         S = GMFActive;
420     }
421 
422     /// Saw an 'export' identifier.
423     void handleExport() {
424       // The presence of an 'export' keyword always ends or excludes a GMF.
425       S = GMFAbsentOrEnded;
426     }
427 
428     /// Saw an 'import' identifier.
429     void handleImport(bool AfterTopLevelTokenSeq) {
430       // If we see this before any 'module' kw, then we have no GMF.
431       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
432         S = GMFAbsentOrEnded;
433     }
434 
435     /// Saw a 'module' identifier.
436     void handleModule(bool AfterTopLevelTokenSeq) {
437       // This was the first module identifier and not preceded by any token
438       // that would exclude a GMF.  It could begin a GMF, but only if directly
439       // followed by a semicolon.
440       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
441         S = MaybeGMF;
442       else
443         S = GMFAbsentOrEnded;
444     }
445 
446     /// Saw any other token.
447     void handleMisc() {
448       // We saw something other than ; after the 'module' kw, so not a GMF.
449       if (S == MaybeGMF)
450         S = GMFAbsentOrEnded;
451     }
452 
453     bool inGMF() { return S == GMFActive; }
454 
455   private:
456     /// Track the transitions into and out of a Global Module Fragment,
457     /// if one is present.
458     GMFState S;
459   };
460 
461   TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
462 
463   /// Whether the module import expects an identifier next. Otherwise,
464   /// it expects a '.' or ';'.
465   bool ModuleImportExpectsIdentifier = false;
466 
467   /// The identifier and source location of the currently-active
468   /// \#pragma clang arc_cf_code_audited begin.
469   std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
470 
471   /// The source location of the currently-active
472   /// \#pragma clang assume_nonnull begin.
473   SourceLocation PragmaAssumeNonNullLoc;
474 
475   /// Set only for preambles which end with an active
476   /// \#pragma clang assume_nonnull begin.
477   ///
478   /// When the preamble is loaded into the main file,
479   /// `PragmaAssumeNonNullLoc` will be set to this to
480   /// replay the unterminated assume_nonnull.
481   SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
482 
483   /// True if we hit the code-completion point.
484   bool CodeCompletionReached = false;
485 
486   /// The code completion token containing the information
487   /// on the stem that is to be code completed.
488   IdentifierInfo *CodeCompletionII = nullptr;
489 
490   /// Range for the code completion token.
491   SourceRange CodeCompletionTokenRange;
492 
493   /// The directory that the main file should be considered to occupy,
494   /// if it does not correspond to a real file (as happens when building a
495   /// module).
496   const DirectoryEntry *MainFileDir = nullptr;
497 
498   /// The number of bytes that we will initially skip when entering the
499   /// main file, along with a flag that indicates whether skipping this number
500   /// of bytes will place the lexer at the start of a line.
501   ///
502   /// This is used when loading a precompiled preamble.
503   std::pair<int, bool> SkipMainFilePreamble;
504 
505   /// Whether we hit an error due to reaching max allowed include depth. Allows
506   /// to avoid hitting the same error over and over again.
507   bool HasReachedMaxIncludeDepth = false;
508 
509   /// The number of currently-active calls to Lex.
510   ///
511   /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
512   /// require asking for multiple additional tokens. This counter makes it
513   /// possible for Lex to detect whether it's producing a token for the end
514   /// of phase 4 of translation or for some other situation.
515   unsigned LexLevel = 0;
516 
517   /// The number of (LexLevel 0) preprocessor tokens.
518   unsigned TokenCount = 0;
519 
520   /// Preprocess every token regardless of LexLevel.
521   bool PreprocessToken = false;
522 
523   /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
524   /// warning, or zero for unlimited.
525   unsigned MaxTokens = 0;
526   SourceLocation MaxTokensOverrideLoc;
527 
528 public:
529   struct PreambleSkipInfo {
530     SourceLocation HashTokenLoc;
531     SourceLocation IfTokenLoc;
532     bool FoundNonSkipPortion;
533     bool FoundElse;
534     SourceLocation ElseLoc;
535 
536     PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
537                      bool FoundNonSkipPortion, bool FoundElse,
538                      SourceLocation ElseLoc)
539         : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
540           FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
541           ElseLoc(ElseLoc) {}
542   };
543 
544   using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
545 
546 private:
547   friend class ASTReader;
548   friend class MacroArgs;
549 
550   class PreambleConditionalStackStore {
551     enum State {
552       Off = 0,
553       Recording = 1,
554       Replaying = 2,
555     };
556 
557   public:
558     PreambleConditionalStackStore() = default;
559 
560     void startRecording() { ConditionalStackState = Recording; }
561     void startReplaying() { ConditionalStackState = Replaying; }
562     bool isRecording() const { return ConditionalStackState == Recording; }
563     bool isReplaying() const { return ConditionalStackState == Replaying; }
564 
565     ArrayRef<PPConditionalInfo> getStack() const {
566       return ConditionalStack;
567     }
568 
569     void doneReplaying() {
570       ConditionalStack.clear();
571       ConditionalStackState = Off;
572     }
573 
574     void setStack(ArrayRef<PPConditionalInfo> s) {
575       if (!isRecording() && !isReplaying())
576         return;
577       ConditionalStack.clear();
578       ConditionalStack.append(s.begin(), s.end());
579     }
580 
581     bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
582 
583     bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
584 
585     void clearSkipInfo() { SkipInfo.reset(); }
586 
587     llvm::Optional<PreambleSkipInfo> SkipInfo;
588 
589   private:
590     SmallVector<PPConditionalInfo, 4> ConditionalStack;
591     State ConditionalStackState = Off;
592   } PreambleConditionalStack;
593 
594   /// The current top of the stack that we're lexing from if
595   /// not expanding a macro and we are lexing directly from source code.
596   ///
597   /// Only one of CurLexer, or CurTokenLexer will be non-null.
598   std::unique_ptr<Lexer> CurLexer;
599 
600   /// The current top of the stack what we're lexing from
601   /// if not expanding a macro.
602   ///
603   /// This is an alias for CurLexer.
604   PreprocessorLexer *CurPPLexer = nullptr;
605 
606   /// Used to find the current FileEntry, if CurLexer is non-null
607   /// and if applicable.
608   ///
609   /// This allows us to implement \#include_next and find directory-specific
610   /// properties.
611   ConstSearchDirIterator CurDirLookup = nullptr;
612 
613   /// The current macro we are expanding, if we are expanding a macro.
614   ///
615   /// One of CurLexer and CurTokenLexer must be null.
616   std::unique_ptr<TokenLexer> CurTokenLexer;
617 
618   /// The kind of lexer we're currently working with.
619   enum CurLexerKind {
620     CLK_Lexer,
621     CLK_TokenLexer,
622     CLK_CachingLexer,
623     CLK_DependencyDirectivesLexer,
624     CLK_LexAfterModuleImport
625   } CurLexerKind = CLK_Lexer;
626 
627   /// If the current lexer is for a submodule that is being built, this
628   /// is that submodule.
629   Module *CurLexerSubmodule = nullptr;
630 
631   /// Keeps track of the stack of files currently
632   /// \#included, and macros currently being expanded from, not counting
633   /// CurLexer/CurTokenLexer.
634   struct IncludeStackInfo {
635     enum CurLexerKind           CurLexerKind;
636     Module                     *TheSubmodule;
637     std::unique_ptr<Lexer>      TheLexer;
638     PreprocessorLexer          *ThePPLexer;
639     std::unique_ptr<TokenLexer> TheTokenLexer;
640     ConstSearchDirIterator      TheDirLookup;
641 
642     // The following constructors are completely useless copies of the default
643     // versions, only needed to pacify MSVC.
644     IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
645                      std::unique_ptr<Lexer> &&TheLexer,
646                      PreprocessorLexer *ThePPLexer,
647                      std::unique_ptr<TokenLexer> &&TheTokenLexer,
648                      ConstSearchDirIterator TheDirLookup)
649         : CurLexerKind(std::move(CurLexerKind)),
650           TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
651           ThePPLexer(std::move(ThePPLexer)),
652           TheTokenLexer(std::move(TheTokenLexer)),
653           TheDirLookup(std::move(TheDirLookup)) {}
654   };
655   std::vector<IncludeStackInfo> IncludeMacroStack;
656 
657   /// Actions invoked when some preprocessor activity is
658   /// encountered (e.g. a file is \#included, etc).
659   std::unique_ptr<PPCallbacks> Callbacks;
660 
661   struct MacroExpandsInfo {
662     Token Tok;
663     MacroDefinition MD;
664     SourceRange Range;
665 
666     MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
667         : Tok(Tok), MD(MD), Range(Range) {}
668   };
669   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
670 
671   /// Information about a name that has been used to define a module macro.
672   struct ModuleMacroInfo {
673     /// The most recent macro directive for this identifier.
674     MacroDirective *MD;
675 
676     /// The active module macros for this identifier.
677     llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
678 
679     /// The generation number at which we last updated ActiveModuleMacros.
680     /// \see Preprocessor::VisibleModules.
681     unsigned ActiveModuleMacrosGeneration = 0;
682 
683     /// Whether this macro name is ambiguous.
684     bool IsAmbiguous = false;
685 
686     /// The module macros that are overridden by this macro.
687     llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
688 
689     ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
690   };
691 
692   /// The state of a macro for an identifier.
693   class MacroState {
694     mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
695 
696     ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
697                                    const IdentifierInfo *II) const {
698       if (II->isOutOfDate())
699         PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
700       // FIXME: Find a spare bit on IdentifierInfo and store a
701       //        HasModuleMacros flag.
702       if (!II->hasMacroDefinition() ||
703           (!PP.getLangOpts().Modules &&
704            !PP.getLangOpts().ModulesLocalVisibility) ||
705           !PP.CurSubmoduleState->VisibleModules.getGeneration())
706         return nullptr;
707 
708       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
709       if (!Info) {
710         Info = new (PP.getPreprocessorAllocator())
711             ModuleMacroInfo(State.get<MacroDirective *>());
712         State = Info;
713       }
714 
715       if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
716           Info->ActiveModuleMacrosGeneration)
717         PP.updateModuleMacroInfo(II, *Info);
718       return Info;
719     }
720 
721   public:
722     MacroState() : MacroState(nullptr) {}
723     MacroState(MacroDirective *MD) : State(MD) {}
724 
725     MacroState(MacroState &&O) noexcept : State(O.State) {
726       O.State = (MacroDirective *)nullptr;
727     }
728 
729     MacroState &operator=(MacroState &&O) noexcept {
730       auto S = O.State;
731       O.State = (MacroDirective *)nullptr;
732       State = S;
733       return *this;
734     }
735 
736     ~MacroState() {
737       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
738         Info->~ModuleMacroInfo();
739     }
740 
741     MacroDirective *getLatest() const {
742       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
743         return Info->MD;
744       return State.get<MacroDirective*>();
745     }
746 
747     void setLatest(MacroDirective *MD) {
748       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
749         Info->MD = MD;
750       else
751         State = MD;
752     }
753 
754     bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
755       auto *Info = getModuleInfo(PP, II);
756       return Info ? Info->IsAmbiguous : false;
757     }
758 
759     ArrayRef<ModuleMacro *>
760     getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
761       if (auto *Info = getModuleInfo(PP, II))
762         return Info->ActiveModuleMacros;
763       return None;
764     }
765 
766     MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
767                                                SourceManager &SourceMgr) const {
768       // FIXME: Incorporate module macros into the result of this.
769       if (auto *Latest = getLatest())
770         return Latest->findDirectiveAtLoc(Loc, SourceMgr);
771       return {};
772     }
773 
774     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
775       if (auto *Info = getModuleInfo(PP, II)) {
776         Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
777                                       Info->ActiveModuleMacros.begin(),
778                                       Info->ActiveModuleMacros.end());
779         Info->ActiveModuleMacros.clear();
780         Info->IsAmbiguous = false;
781       }
782     }
783 
784     ArrayRef<ModuleMacro*> getOverriddenMacros() const {
785       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
786         return Info->OverriddenMacros;
787       return None;
788     }
789 
790     void setOverriddenMacros(Preprocessor &PP,
791                              ArrayRef<ModuleMacro *> Overrides) {
792       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
793       if (!Info) {
794         if (Overrides.empty())
795           return;
796         Info = new (PP.getPreprocessorAllocator())
797             ModuleMacroInfo(State.get<MacroDirective *>());
798         State = Info;
799       }
800       Info->OverriddenMacros.clear();
801       Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
802                                     Overrides.begin(), Overrides.end());
803       Info->ActiveModuleMacrosGeneration = 0;
804     }
805   };
806 
807   /// For each IdentifierInfo that was associated with a macro, we
808   /// keep a mapping to the history of all macro definitions and #undefs in
809   /// the reverse order (the latest one is in the head of the list).
810   ///
811   /// This mapping lives within the \p CurSubmoduleState.
812   using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
813 
814   struct SubmoduleState;
815 
816   /// Information about a submodule that we're currently building.
817   struct BuildingSubmoduleInfo {
818     /// The module that we are building.
819     Module *M;
820 
821     /// The location at which the module was included.
822     SourceLocation ImportLoc;
823 
824     /// Whether we entered this submodule via a pragma.
825     bool IsPragma;
826 
827     /// The previous SubmoduleState.
828     SubmoduleState *OuterSubmoduleState;
829 
830     /// The number of pending module macro names when we started building this.
831     unsigned OuterPendingModuleMacroNames;
832 
833     BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
834                           SubmoduleState *OuterSubmoduleState,
835                           unsigned OuterPendingModuleMacroNames)
836         : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
837           OuterSubmoduleState(OuterSubmoduleState),
838           OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
839   };
840   SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
841 
842   /// Information about a submodule's preprocessor state.
843   struct SubmoduleState {
844     /// The macros for the submodule.
845     MacroMap Macros;
846 
847     /// The set of modules that are visible within the submodule.
848     VisibleModuleSet VisibleModules;
849 
850     // FIXME: CounterValue?
851     // FIXME: PragmaPushMacroInfo?
852   };
853   std::map<Module *, SubmoduleState> Submodules;
854 
855   /// The preprocessor state for preprocessing outside of any submodule.
856   SubmoduleState NullSubmoduleState;
857 
858   /// The current submodule state. Will be \p NullSubmoduleState if we're not
859   /// in a submodule.
860   SubmoduleState *CurSubmoduleState;
861 
862   /// The files that have been included.
863   IncludedFilesSet IncludedFiles;
864 
865   /// The set of known macros exported from modules.
866   llvm::FoldingSet<ModuleMacro> ModuleMacros;
867 
868   /// The names of potential module macros that we've not yet processed.
869   llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
870 
871   /// The list of module macros, for each identifier, that are not overridden by
872   /// any other module macro.
873   llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
874       LeafModuleMacros;
875 
876   /// Macros that we want to warn because they are not used at the end
877   /// of the translation unit.
878   ///
879   /// We store just their SourceLocations instead of
880   /// something like MacroInfo*. The benefit of this is that when we are
881   /// deserializing from PCH, we don't need to deserialize identifier & macros
882   /// just so that we can report that they are unused, we just warn using
883   /// the SourceLocations of this set (that will be filled by the ASTReader).
884   using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
885   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
886 
887   /// This is a pair of an optional message and source location used for pragmas
888   /// that annotate macros like pragma clang restrict_expansion and pragma clang
889   /// deprecated. This pair stores the optional message and the location of the
890   /// annotation pragma for use producing diagnostics and notes.
891   using MsgLocationPair = std::pair<std::string, SourceLocation>;
892 
893   struct MacroAnnotationInfo {
894     SourceLocation Location;
895     std::string Message;
896   };
897 
898   struct MacroAnnotations {
899     llvm::Optional<MacroAnnotationInfo> DeprecationInfo;
900     llvm::Optional<MacroAnnotationInfo> RestrictExpansionInfo;
901     llvm::Optional<SourceLocation> FinalAnnotationLoc;
902 
903     static MacroAnnotations makeDeprecation(SourceLocation Loc,
904                                             std::string Msg) {
905       return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)},
906                               llvm::None, llvm::None};
907     }
908 
909     static MacroAnnotations makeRestrictExpansion(SourceLocation Loc,
910                                                   std::string Msg) {
911       return MacroAnnotations{
912           llvm::None, MacroAnnotationInfo{Loc, std::move(Msg)}, llvm::None};
913     }
914 
915     static MacroAnnotations makeFinal(SourceLocation Loc) {
916       return MacroAnnotations{llvm::None, llvm::None, Loc};
917     }
918   };
919 
920   /// Warning information for macro annotations.
921   llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
922 
923   /// A "freelist" of MacroArg objects that can be
924   /// reused for quick allocation.
925   MacroArgs *MacroArgCache = nullptr;
926 
927   /// For each IdentifierInfo used in a \#pragma push_macro directive,
928   /// we keep a MacroInfo stack used to restore the previous macro value.
929   llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
930       PragmaPushMacroInfo;
931 
932   // Various statistics we track for performance analysis.
933   unsigned NumDirectives = 0;
934   unsigned NumDefined = 0;
935   unsigned NumUndefined = 0;
936   unsigned NumPragma = 0;
937   unsigned NumIf = 0;
938   unsigned NumElse = 0;
939   unsigned NumEndif = 0;
940   unsigned NumEnteredSourceFiles = 0;
941   unsigned MaxIncludeStackDepth = 0;
942   unsigned NumMacroExpanded = 0;
943   unsigned NumFnMacroExpanded = 0;
944   unsigned NumBuiltinMacroExpanded = 0;
945   unsigned NumFastMacroExpanded = 0;
946   unsigned NumTokenPaste = 0;
947   unsigned NumFastTokenPaste = 0;
948   unsigned NumSkipped = 0;
949 
950   /// The predefined macros that preprocessor should use from the
951   /// command line etc.
952   std::string Predefines;
953 
954   /// The file ID for the preprocessor predefines.
955   FileID PredefinesFileID;
956 
957   /// The file ID for the PCH through header.
958   FileID PCHThroughHeaderFileID;
959 
960   /// Whether tokens are being skipped until a #pragma hdrstop is seen.
961   bool SkippingUntilPragmaHdrStop = false;
962 
963   /// Whether tokens are being skipped until the through header is seen.
964   bool SkippingUntilPCHThroughHeader = false;
965 
966   /// \{
967   /// Cache of macro expanders to reduce malloc traffic.
968   enum { TokenLexerCacheSize = 8 };
969   unsigned NumCachedTokenLexers;
970   std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
971   /// \}
972 
973   /// Keeps macro expanded tokens for TokenLexers.
974   //
975   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
976   /// going to lex in the cache and when it finishes the tokens are removed
977   /// from the end of the cache.
978   SmallVector<Token, 16> MacroExpandedTokens;
979   std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
980 
981   /// A record of the macro definitions and expansions that
982   /// occurred during preprocessing.
983   ///
984   /// This is an optional side structure that can be enabled with
985   /// \c createPreprocessingRecord() prior to preprocessing.
986   PreprocessingRecord *Record = nullptr;
987 
988   /// Cached tokens state.
989   using CachedTokensTy = SmallVector<Token, 1>;
990 
991   /// Cached tokens are stored here when we do backtracking or
992   /// lookahead. They are "lexed" by the CachingLex() method.
993   CachedTokensTy CachedTokens;
994 
995   /// The position of the cached token that CachingLex() should
996   /// "lex" next.
997   ///
998   /// If it points beyond the CachedTokens vector, it means that a normal
999   /// Lex() should be invoked.
1000   CachedTokensTy::size_type CachedLexPos = 0;
1001 
1002   /// Stack of backtrack positions, allowing nested backtracks.
1003   ///
1004   /// The EnableBacktrackAtThisPos() method pushes a position to
1005   /// indicate where CachedLexPos should be set when the BackTrack() method is
1006   /// invoked (at which point the last position is popped).
1007   std::vector<CachedTokensTy::size_type> BacktrackPositions;
1008 
1009   struct MacroInfoChain {
1010     MacroInfo MI;
1011     MacroInfoChain *Next;
1012   };
1013 
1014   /// MacroInfos are managed as a chain for easy disposal.  This is the head
1015   /// of that list.
1016   MacroInfoChain *MIChainHead = nullptr;
1017 
1018   /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
1019   /// This is used to guard against calling this function recursively.
1020   ///
1021   /// See comments at the use-site for more context about why it is needed.
1022   bool SkippingExcludedConditionalBlock = false;
1023 
1024   /// Keeps track of skipped range mappings that were recorded while skipping
1025   /// excluded conditional directives. It maps the source buffer pointer at
1026   /// the beginning of a skipped block, to the number of bytes that should be
1027   /// skipped.
1028   llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
1029 
1030   void updateOutOfDateIdentifier(IdentifierInfo &II) const;
1031 
1032 public:
1033   Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
1034                DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
1035                HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
1036                IdentifierInfoLookup *IILookup = nullptr,
1037                bool OwnsHeaderSearch = false,
1038                TranslationUnitKind TUKind = TU_Complete);
1039 
1040   ~Preprocessor();
1041 
1042   /// Initialize the preprocessor using information about the target.
1043   ///
1044   /// \param Target is owned by the caller and must remain valid for the
1045   /// lifetime of the preprocessor.
1046   /// \param AuxTarget is owned by the caller and must remain valid for
1047   /// the lifetime of the preprocessor.
1048   void Initialize(const TargetInfo &Target,
1049                   const TargetInfo *AuxTarget = nullptr);
1050 
1051   /// Initialize the preprocessor to parse a model file
1052   ///
1053   /// To parse model files the preprocessor of the original source is reused to
1054   /// preserver the identifier table. However to avoid some duplicate
1055   /// information in the preprocessor some cleanup is needed before it is used
1056   /// to parse model files. This method does that cleanup.
1057   void InitializeForModelFile();
1058 
1059   /// Cleanup after model file parsing
1060   void FinalizeForModelFile();
1061 
1062   /// Retrieve the preprocessor options used to initialize this
1063   /// preprocessor.
1064   PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
1065 
1066   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
1067   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
1068 
1069   const LangOptions &getLangOpts() const { return LangOpts; }
1070   const TargetInfo &getTargetInfo() const { return *Target; }
1071   const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
1072   FileManager &getFileManager() const { return FileMgr; }
1073   SourceManager &getSourceManager() const { return SourceMgr; }
1074   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
1075 
1076   IdentifierTable &getIdentifierTable() { return Identifiers; }
1077   const IdentifierTable &getIdentifierTable() const { return Identifiers; }
1078   SelectorTable &getSelectorTable() { return Selectors; }
1079   Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
1080   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
1081 
1082   void setExternalSource(ExternalPreprocessorSource *Source) {
1083     ExternalSource = Source;
1084   }
1085 
1086   ExternalPreprocessorSource *getExternalSource() const {
1087     return ExternalSource;
1088   }
1089 
1090   /// Retrieve the module loader associated with this preprocessor.
1091   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
1092 
1093   bool hadModuleLoaderFatalFailure() const {
1094     return TheModuleLoader.HadFatalFailure;
1095   }
1096 
1097   /// Retrieve the number of Directives that have been processed by the
1098   /// Preprocessor.
1099   unsigned getNumDirectives() const {
1100     return NumDirectives;
1101   }
1102 
1103   /// True if we are currently preprocessing a #if or #elif directive
1104   bool isParsingIfOrElifDirective() const {
1105     return ParsingIfOrElifDirective;
1106   }
1107 
1108   /// Control whether the preprocessor retains comments in output.
1109   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
1110     this->KeepComments = KeepComments | KeepMacroComments;
1111     this->KeepMacroComments = KeepMacroComments;
1112   }
1113 
1114   bool getCommentRetentionState() const { return KeepComments; }
1115 
1116   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
1117   bool getPragmasEnabled() const { return PragmasEnabled; }
1118 
1119   void SetSuppressIncludeNotFoundError(bool Suppress) {
1120     SuppressIncludeNotFoundError = Suppress;
1121   }
1122 
1123   bool GetSuppressIncludeNotFoundError() {
1124     return SuppressIncludeNotFoundError;
1125   }
1126 
1127   /// Sets whether the preprocessor is responsible for producing output or if
1128   /// it is producing tokens to be consumed by Parse and Sema.
1129   void setPreprocessedOutput(bool IsPreprocessedOutput) {
1130     PreprocessedOutput = IsPreprocessedOutput;
1131   }
1132 
1133   /// Returns true if the preprocessor is responsible for generating output,
1134   /// false if it is producing tokens to be consumed by Parse and Sema.
1135   bool isPreprocessedOutput() const { return PreprocessedOutput; }
1136 
1137   /// Return true if we are lexing directly from the specified lexer.
1138   bool isCurrentLexer(const PreprocessorLexer *L) const {
1139     return CurPPLexer == L;
1140   }
1141 
1142   /// Return the current lexer being lexed from.
1143   ///
1144   /// Note that this ignores any potentially active macro expansions and _Pragma
1145   /// expansions going on at the time.
1146   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1147 
1148   /// Return the current file lexer being lexed from.
1149   ///
1150   /// Note that this ignores any potentially active macro expansions and _Pragma
1151   /// expansions going on at the time.
1152   PreprocessorLexer *getCurrentFileLexer() const;
1153 
1154   /// Return the submodule owning the file being lexed. This may not be
1155   /// the current module if we have changed modules since entering the file.
1156   Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1157 
1158   /// Returns the FileID for the preprocessor predefines.
1159   FileID getPredefinesFileID() const { return PredefinesFileID; }
1160 
1161   /// \{
1162   /// Accessors for preprocessor callbacks.
1163   ///
1164   /// Note that this class takes ownership of any PPCallbacks object given to
1165   /// it.
1166   PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
1167   void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1168     if (Callbacks)
1169       C = std::make_unique<PPChainedCallbacks>(std::move(C),
1170                                                 std::move(Callbacks));
1171     Callbacks = std::move(C);
1172   }
1173   /// \}
1174 
1175   /// Get the number of tokens processed so far.
1176   unsigned getTokenCount() const { return TokenCount; }
1177 
1178   /// Get the max number of tokens before issuing a -Wmax-tokens warning.
1179   unsigned getMaxTokens() const { return MaxTokens; }
1180 
1181   void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1182     MaxTokens = Value;
1183     MaxTokensOverrideLoc = Loc;
1184   };
1185 
1186   SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1187 
1188   /// Register a function that would be called on each token in the final
1189   /// expanded token stream.
1190   /// This also reports annotation tokens produced by the parser.
1191   void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1192     OnToken = std::move(F);
1193   }
1194 
1195   void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1196 
1197   bool isMacroDefined(StringRef Id) {
1198     return isMacroDefined(&Identifiers.get(Id));
1199   }
1200   bool isMacroDefined(const IdentifierInfo *II) {
1201     return II->hasMacroDefinition() &&
1202            (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1203   }
1204 
1205   /// Determine whether II is defined as a macro within the module M,
1206   /// if that is a module that we've already preprocessed. Does not check for
1207   /// macros imported into M.
1208   bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1209     if (!II->hasMacroDefinition())
1210       return false;
1211     auto I = Submodules.find(M);
1212     if (I == Submodules.end())
1213       return false;
1214     auto J = I->second.Macros.find(II);
1215     if (J == I->second.Macros.end())
1216       return false;
1217     auto *MD = J->second.getLatest();
1218     return MD && MD->isDefined();
1219   }
1220 
1221   MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1222     if (!II->hasMacroDefinition())
1223       return {};
1224 
1225     MacroState &S = CurSubmoduleState->Macros[II];
1226     auto *MD = S.getLatest();
1227     while (MD && isa<VisibilityMacroDirective>(MD))
1228       MD = MD->getPrevious();
1229     return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1230                            S.getActiveModuleMacros(*this, II),
1231                            S.isAmbiguous(*this, II));
1232   }
1233 
1234   MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1235                                           SourceLocation Loc) {
1236     if (!II->hadMacroDefinition())
1237       return {};
1238 
1239     MacroState &S = CurSubmoduleState->Macros[II];
1240     MacroDirective::DefInfo DI;
1241     if (auto *MD = S.getLatest())
1242       DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1243     // FIXME: Compute the set of active module macros at the specified location.
1244     return MacroDefinition(DI.getDirective(),
1245                            S.getActiveModuleMacros(*this, II),
1246                            S.isAmbiguous(*this, II));
1247   }
1248 
1249   /// Given an identifier, return its latest non-imported MacroDirective
1250   /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
1251   MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1252     if (!II->hasMacroDefinition())
1253       return nullptr;
1254 
1255     auto *MD = getLocalMacroDirectiveHistory(II);
1256     if (!MD || MD->getDefinition().isUndefined())
1257       return nullptr;
1258 
1259     return MD;
1260   }
1261 
1262   const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1263     return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1264   }
1265 
1266   MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1267     if (!II->hasMacroDefinition())
1268       return nullptr;
1269     if (auto MD = getMacroDefinition(II))
1270       return MD.getMacroInfo();
1271     return nullptr;
1272   }
1273 
1274   /// Given an identifier, return the latest non-imported macro
1275   /// directive for that identifier.
1276   ///
1277   /// One can iterate over all previous macro directives from the most recent
1278   /// one.
1279   MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1280 
1281   /// Add a directive to the macro directive history for this identifier.
1282   void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
1283   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1284                                              SourceLocation Loc) {
1285     DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1286     appendMacroDirective(II, MD);
1287     return MD;
1288   }
1289   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1290                                              MacroInfo *MI) {
1291     return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1292   }
1293 
1294   /// Set a MacroDirective that was loaded from a PCH file.
1295   void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1296                                MacroDirective *MD);
1297 
1298   /// Register an exported macro for a module and identifier.
1299   ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
1300                               ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1301   ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II);
1302 
1303   /// Get the list of leaf (non-overridden) module macros for a name.
1304   ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1305     if (II->isOutOfDate())
1306       updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
1307     auto I = LeafModuleMacros.find(II);
1308     if (I != LeafModuleMacros.end())
1309       return I->second;
1310     return None;
1311   }
1312 
1313   /// Get the list of submodules that we're currently building.
1314   ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const {
1315     return BuildingSubmoduleStack;
1316   }
1317 
1318   /// \{
1319   /// Iterators for the macro history table. Currently defined macros have
1320   /// IdentifierInfo::hasMacroDefinition() set and an empty
1321   /// MacroInfo::getUndefLoc() at the head of the list.
1322   using macro_iterator = MacroMap::const_iterator;
1323 
1324   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1325   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1326 
1327   llvm::iterator_range<macro_iterator>
1328   macros(bool IncludeExternalMacros = true) const {
1329     macro_iterator begin = macro_begin(IncludeExternalMacros);
1330     macro_iterator end = macro_end(IncludeExternalMacros);
1331     return llvm::make_range(begin, end);
1332   }
1333 
1334   /// \}
1335 
1336   /// Mark the file as included.
1337   /// Returns true if this is the first time the file was included.
1338   bool markIncluded(const FileEntry *File) {
1339     HeaderInfo.getFileInfo(File);
1340     return IncludedFiles.insert(File).second;
1341   }
1342 
1343   /// Return true if this header has already been included.
1344   bool alreadyIncluded(const FileEntry *File) const {
1345     return IncludedFiles.count(File);
1346   }
1347 
1348   /// Get the set of included files.
1349   IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
1350   const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
1351 
1352   /// Return the name of the macro defined before \p Loc that has
1353   /// spelling \p Tokens.  If there are multiple macros with same spelling,
1354   /// return the last one defined.
1355   StringRef getLastMacroWithSpelling(SourceLocation Loc,
1356                                      ArrayRef<TokenValue> Tokens) const;
1357 
1358   /// Get the predefines for this processor.
1359   /// Used by some third-party tools to inspect and add predefines (see
1360   /// https://github.com/llvm/llvm-project/issues/57483).
1361   const std::string &getPredefines() const { return Predefines; }
1362 
1363   /// Set the predefines for this Preprocessor.
1364   ///
1365   /// These predefines are automatically injected when parsing the main file.
1366   void setPredefines(std::string P) { Predefines = std::move(P); }
1367 
1368   /// Return information about the specified preprocessor
1369   /// identifier token.
1370   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1371     return &Identifiers.get(Name);
1372   }
1373 
1374   /// Add the specified pragma handler to this preprocessor.
1375   ///
1376   /// If \p Namespace is non-null, then it is a token required to exist on the
1377   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1378   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1379   void AddPragmaHandler(PragmaHandler *Handler) {
1380     AddPragmaHandler(StringRef(), Handler);
1381   }
1382 
1383   /// Remove the specific pragma handler from this preprocessor.
1384   ///
1385   /// If \p Namespace is non-null, then it should be the namespace that
1386   /// \p Handler was added to. It is an error to remove a handler that
1387   /// has not been registered.
1388   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1389   void RemovePragmaHandler(PragmaHandler *Handler) {
1390     RemovePragmaHandler(StringRef(), Handler);
1391   }
1392 
1393   /// Install empty handlers for all pragmas (making them ignored).
1394   void IgnorePragmas();
1395 
1396   /// Set empty line handler.
1397   void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1398 
1399   EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1400 
1401   /// Add the specified comment handler to the preprocessor.
1402   void addCommentHandler(CommentHandler *Handler);
1403 
1404   /// Remove the specified comment handler.
1405   ///
1406   /// It is an error to remove a handler that has not been registered.
1407   void removeCommentHandler(CommentHandler *Handler);
1408 
1409   /// Set the code completion handler to the given object.
1410   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1411     CodeComplete = &Handler;
1412   }
1413 
1414   /// Retrieve the current code-completion handler.
1415   CodeCompletionHandler *getCodeCompletionHandler() const {
1416     return CodeComplete;
1417   }
1418 
1419   /// Clear out the code completion handler.
1420   void clearCodeCompletionHandler() {
1421     CodeComplete = nullptr;
1422   }
1423 
1424   /// Hook used by the lexer to invoke the "included file" code
1425   /// completion point.
1426   void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1427 
1428   /// Hook used by the lexer to invoke the "natural language" code
1429   /// completion point.
1430   void CodeCompleteNaturalLanguage();
1431 
1432   /// Set the code completion token for filtering purposes.
1433   void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1434     CodeCompletionII = Filter;
1435   }
1436 
1437   /// Set the code completion token range for detecting replacement range later
1438   /// on.
1439   void setCodeCompletionTokenRange(const SourceLocation Start,
1440                                    const SourceLocation End) {
1441     CodeCompletionTokenRange = {Start, End};
1442   }
1443   SourceRange getCodeCompletionTokenRange() const {
1444     return CodeCompletionTokenRange;
1445   }
1446 
1447   /// Get the code completion token for filtering purposes.
1448   StringRef getCodeCompletionFilter() {
1449     if (CodeCompletionII)
1450       return CodeCompletionII->getName();
1451     return {};
1452   }
1453 
1454   /// Retrieve the preprocessing record, or NULL if there is no
1455   /// preprocessing record.
1456   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1457 
1458   /// Create a new preprocessing record, which will keep track of
1459   /// all macro expansions, macro definitions, etc.
1460   void createPreprocessingRecord();
1461 
1462   /// Returns true if the FileEntry is the PCH through header.
1463   bool isPCHThroughHeader(const FileEntry *FE);
1464 
1465   /// True if creating a PCH with a through header.
1466   bool creatingPCHWithThroughHeader();
1467 
1468   /// True if using a PCH with a through header.
1469   bool usingPCHWithThroughHeader();
1470 
1471   /// True if creating a PCH with a #pragma hdrstop.
1472   bool creatingPCHWithPragmaHdrStop();
1473 
1474   /// True if using a PCH with a #pragma hdrstop.
1475   bool usingPCHWithPragmaHdrStop();
1476 
1477   /// Skip tokens until after the #include of the through header or
1478   /// until after a #pragma hdrstop.
1479   void SkipTokensWhileUsingPCH();
1480 
1481   /// Process directives while skipping until the through header or
1482   /// #pragma hdrstop is found.
1483   void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1484                                            SourceLocation HashLoc);
1485 
1486   /// Enter the specified FileID as the main source file,
1487   /// which implicitly adds the builtin defines etc.
1488   void EnterMainSourceFile();
1489 
1490   /// Inform the preprocessor callbacks that processing is complete.
1491   void EndSourceFile();
1492 
1493   /// Add a source file to the top of the include stack and
1494   /// start lexing tokens from it instead of the current buffer.
1495   ///
1496   /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1497   bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir,
1498                        SourceLocation Loc, bool IsFirstIncludeOfFile = true);
1499 
1500   /// Add a Macro to the top of the include stack and start lexing
1501   /// tokens from it instead of the current buffer.
1502   ///
1503   /// \param Args specifies the tokens input to a function-like macro.
1504   /// \param ILEnd specifies the location of the ')' for a function-like macro
1505   /// or the identifier for an object-like macro.
1506   void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1507                   MacroArgs *Args);
1508 
1509 private:
1510   /// Add a "macro" context to the top of the include stack,
1511   /// which will cause the lexer to start returning the specified tokens.
1512   ///
1513   /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1514   /// will not be subject to further macro expansion. Otherwise, these tokens
1515   /// will be re-macro-expanded when/if expansion is enabled.
1516   ///
1517   /// If \p OwnsTokens is false, this method assumes that the specified stream
1518   /// of tokens has a permanent owner somewhere, so they do not need to be
1519   /// copied. If it is true, it assumes the array of tokens is allocated with
1520   /// \c new[] and the Preprocessor will delete[] it.
1521   ///
1522   /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1523   /// set, see the flag documentation for details.
1524   void EnterTokenStream(const Token *Toks, unsigned NumToks,
1525                         bool DisableMacroExpansion, bool OwnsTokens,
1526                         bool IsReinject);
1527 
1528 public:
1529   void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1530                         bool DisableMacroExpansion, bool IsReinject) {
1531     EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1532                      IsReinject);
1533   }
1534 
1535   void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1536                         bool IsReinject) {
1537     EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1538                      IsReinject);
1539   }
1540 
1541   /// Pop the current lexer/macro exp off the top of the lexer stack.
1542   ///
1543   /// This should only be used in situations where the current state of the
1544   /// top-of-stack lexer is known.
1545   void RemoveTopOfLexerStack();
1546 
1547   /// From the point that this method is called, and until
1548   /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1549   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1550   /// make the Preprocessor re-lex the same tokens.
1551   ///
1552   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1553   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1554   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1555   ///
1556   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1557   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1558   /// tokens will continue indefinitely.
1559   ///
1560   void EnableBacktrackAtThisPos();
1561 
1562   /// Disable the last EnableBacktrackAtThisPos call.
1563   void CommitBacktrackedTokens();
1564 
1565   /// Make Preprocessor re-lex the tokens that were lexed since
1566   /// EnableBacktrackAtThisPos() was previously called.
1567   void Backtrack();
1568 
1569   /// True if EnableBacktrackAtThisPos() was called and
1570   /// caching of tokens is on.
1571   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1572 
1573   /// Lex the next token for this preprocessor.
1574   void Lex(Token &Result);
1575 
1576   /// Lex a token, forming a header-name token if possible.
1577   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1578 
1579   bool LexAfterModuleImport(Token &Result);
1580   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1581 
1582   void makeModuleVisible(Module *M, SourceLocation Loc);
1583 
1584   SourceLocation getModuleImportLoc(Module *M) const {
1585     return CurSubmoduleState->VisibleModules.getImportLoc(M);
1586   }
1587 
1588   /// Lex a string literal, which may be the concatenation of multiple
1589   /// string literals and may even come from macro expansion.
1590   /// \returns true on success, false if a error diagnostic has been generated.
1591   bool LexStringLiteral(Token &Result, std::string &String,
1592                         const char *DiagnosticTag, bool AllowMacroExpansion) {
1593     if (AllowMacroExpansion)
1594       Lex(Result);
1595     else
1596       LexUnexpandedToken(Result);
1597     return FinishLexStringLiteral(Result, String, DiagnosticTag,
1598                                   AllowMacroExpansion);
1599   }
1600 
1601   /// Complete the lexing of a string literal where the first token has
1602   /// already been lexed (see LexStringLiteral).
1603   bool FinishLexStringLiteral(Token &Result, std::string &String,
1604                               const char *DiagnosticTag,
1605                               bool AllowMacroExpansion);
1606 
1607   /// Lex a token.  If it's a comment, keep lexing until we get
1608   /// something not a comment.
1609   ///
1610   /// This is useful in -E -C mode where comments would foul up preprocessor
1611   /// directive handling.
1612   void LexNonComment(Token &Result) {
1613     do
1614       Lex(Result);
1615     while (Result.getKind() == tok::comment);
1616   }
1617 
1618   /// Just like Lex, but disables macro expansion of identifier tokens.
1619   void LexUnexpandedToken(Token &Result) {
1620     // Disable macro expansion.
1621     bool OldVal = DisableMacroExpansion;
1622     DisableMacroExpansion = true;
1623     // Lex the token.
1624     Lex(Result);
1625 
1626     // Reenable it.
1627     DisableMacroExpansion = OldVal;
1628   }
1629 
1630   /// Like LexNonComment, but this disables macro expansion of
1631   /// identifier tokens.
1632   void LexUnexpandedNonComment(Token &Result) {
1633     do
1634       LexUnexpandedToken(Result);
1635     while (Result.getKind() == tok::comment);
1636   }
1637 
1638   /// Parses a simple integer literal to get its numeric value.  Floating
1639   /// point literals and user defined literals are rejected.  Used primarily to
1640   /// handle pragmas that accept integer arguments.
1641   bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1642 
1643   /// Disables macro expansion everywhere except for preprocessor directives.
1644   void SetMacroExpansionOnlyInDirectives() {
1645     DisableMacroExpansion = true;
1646     MacroExpansionInDirectivesOverride = true;
1647   }
1648 
1649   /// Peeks ahead N tokens and returns that token without consuming any
1650   /// tokens.
1651   ///
1652   /// LookAhead(0) returns the next token that would be returned by Lex(),
1653   /// LookAhead(1) returns the token after it, etc.  This returns normal
1654   /// tokens after phase 5.  As such, it is equivalent to using
1655   /// 'Lex', not 'LexUnexpandedToken'.
1656   const Token &LookAhead(unsigned N) {
1657     assert(LexLevel == 0 && "cannot use lookahead while lexing");
1658     if (CachedLexPos + N < CachedTokens.size())
1659       return CachedTokens[CachedLexPos+N];
1660     else
1661       return PeekAhead(N+1);
1662   }
1663 
1664   /// When backtracking is enabled and tokens are cached,
1665   /// this allows to revert a specific number of tokens.
1666   ///
1667   /// Note that the number of tokens being reverted should be up to the last
1668   /// backtrack position, not more.
1669   void RevertCachedTokens(unsigned N) {
1670     assert(isBacktrackEnabled() &&
1671            "Should only be called when tokens are cached for backtracking");
1672     assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1673          && "Should revert tokens up to the last backtrack position, not more");
1674     assert(signed(CachedLexPos) - signed(N) >= 0 &&
1675            "Corrupted backtrack positions ?");
1676     CachedLexPos -= N;
1677   }
1678 
1679   /// Enters a token in the token stream to be lexed next.
1680   ///
1681   /// If BackTrack() is called afterwards, the token will remain at the
1682   /// insertion point.
1683   /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1684   /// flag set. See the flag documentation for details.
1685   void EnterToken(const Token &Tok, bool IsReinject) {
1686     if (LexLevel) {
1687       // It's not correct in general to enter caching lex mode while in the
1688       // middle of a nested lexing action.
1689       auto TokCopy = std::make_unique<Token[]>(1);
1690       TokCopy[0] = Tok;
1691       EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1692     } else {
1693       EnterCachingLexMode();
1694       assert(IsReinject && "new tokens in the middle of cached stream");
1695       CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1696     }
1697   }
1698 
1699   /// We notify the Preprocessor that if it is caching tokens (because
1700   /// backtrack is enabled) it should replace the most recent cached tokens
1701   /// with the given annotation token. This function has no effect if
1702   /// backtracking is not enabled.
1703   ///
1704   /// Note that the use of this function is just for optimization, so that the
1705   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1706   /// invoked.
1707   void AnnotateCachedTokens(const Token &Tok) {
1708     assert(Tok.isAnnotation() && "Expected annotation token");
1709     if (CachedLexPos != 0 && isBacktrackEnabled())
1710       AnnotatePreviousCachedTokens(Tok);
1711   }
1712 
1713   /// Get the location of the last cached token, suitable for setting the end
1714   /// location of an annotation token.
1715   SourceLocation getLastCachedTokenLocation() const {
1716     assert(CachedLexPos != 0);
1717     return CachedTokens[CachedLexPos-1].getLastLoc();
1718   }
1719 
1720   /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1721   /// CachedTokens.
1722   bool IsPreviousCachedToken(const Token &Tok) const;
1723 
1724   /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1725   /// in \p NewToks.
1726   ///
1727   /// Useful when a token needs to be split in smaller ones and CachedTokens
1728   /// most recent token must to be updated to reflect that.
1729   void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1730 
1731   /// Replace the last token with an annotation token.
1732   ///
1733   /// Like AnnotateCachedTokens(), this routine replaces an
1734   /// already-parsed (and resolved) token with an annotation
1735   /// token. However, this routine only replaces the last token with
1736   /// the annotation token; it does not affect any other cached
1737   /// tokens. This function has no effect if backtracking is not
1738   /// enabled.
1739   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1740     assert(Tok.isAnnotation() && "Expected annotation token");
1741     if (CachedLexPos != 0 && isBacktrackEnabled())
1742       CachedTokens[CachedLexPos-1] = Tok;
1743   }
1744 
1745   /// Enter an annotation token into the token stream.
1746   void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1747                             void *AnnotationVal);
1748 
1749   /// Determine whether it's possible for a future call to Lex to produce an
1750   /// annotation token created by a previous call to EnterAnnotationToken.
1751   bool mightHavePendingAnnotationTokens() {
1752     return CurLexerKind != CLK_Lexer;
1753   }
1754 
1755   /// Update the current token to represent the provided
1756   /// identifier, in order to cache an action performed by typo correction.
1757   void TypoCorrectToken(const Token &Tok) {
1758     assert(Tok.getIdentifierInfo() && "Expected identifier token");
1759     if (CachedLexPos != 0 && isBacktrackEnabled())
1760       CachedTokens[CachedLexPos-1] = Tok;
1761   }
1762 
1763   /// Recompute the current lexer kind based on the CurLexer/
1764   /// CurTokenLexer pointers.
1765   void recomputeCurLexerKind();
1766 
1767   /// Returns true if incremental processing is enabled
1768   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1769 
1770   /// Enables the incremental processing
1771   void enableIncrementalProcessing(bool value = true) {
1772     IncrementalProcessing = value;
1773   }
1774 
1775   /// Specify the point at which code-completion will be performed.
1776   ///
1777   /// \param File the file in which code completion should occur. If
1778   /// this file is included multiple times, code-completion will
1779   /// perform completion the first time it is included. If NULL, this
1780   /// function clears out the code-completion point.
1781   ///
1782   /// \param Line the line at which code completion should occur
1783   /// (1-based).
1784   ///
1785   /// \param Column the column at which code completion should occur
1786   /// (1-based).
1787   ///
1788   /// \returns true if an error occurred, false otherwise.
1789   bool SetCodeCompletionPoint(const FileEntry *File,
1790                               unsigned Line, unsigned Column);
1791 
1792   /// Determine if we are performing code completion.
1793   bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1794 
1795   /// Returns the location of the code-completion point.
1796   ///
1797   /// Returns an invalid location if code-completion is not enabled or the file
1798   /// containing the code-completion point has not been lexed yet.
1799   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1800 
1801   /// Returns the start location of the file of code-completion point.
1802   ///
1803   /// Returns an invalid location if code-completion is not enabled or the file
1804   /// containing the code-completion point has not been lexed yet.
1805   SourceLocation getCodeCompletionFileLoc() const {
1806     return CodeCompletionFileLoc;
1807   }
1808 
1809   /// Returns true if code-completion is enabled and we have hit the
1810   /// code-completion point.
1811   bool isCodeCompletionReached() const { return CodeCompletionReached; }
1812 
1813   /// Note that we hit the code-completion point.
1814   void setCodeCompletionReached() {
1815     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1816     CodeCompletionReached = true;
1817     // Silence any diagnostics that occur after we hit the code-completion.
1818     getDiagnostics().setSuppressAllDiagnostics(true);
1819   }
1820 
1821   /// The location of the currently-active \#pragma clang
1822   /// arc_cf_code_audited begin.
1823   ///
1824   /// Returns an invalid location if there is no such pragma active.
1825   std::pair<IdentifierInfo *, SourceLocation>
1826   getPragmaARCCFCodeAuditedInfo() const {
1827     return PragmaARCCFCodeAuditedInfo;
1828   }
1829 
1830   /// Set the location of the currently-active \#pragma clang
1831   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
1832   void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
1833                                      SourceLocation Loc) {
1834     PragmaARCCFCodeAuditedInfo = {Ident, Loc};
1835   }
1836 
1837   /// The location of the currently-active \#pragma clang
1838   /// assume_nonnull begin.
1839   ///
1840   /// Returns an invalid location if there is no such pragma active.
1841   SourceLocation getPragmaAssumeNonNullLoc() const {
1842     return PragmaAssumeNonNullLoc;
1843   }
1844 
1845   /// Set the location of the currently-active \#pragma clang
1846   /// assume_nonnull begin.  An invalid location ends the pragma.
1847   void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
1848     PragmaAssumeNonNullLoc = Loc;
1849   }
1850 
1851   /// Get the location of the recorded unterminated \#pragma clang
1852   /// assume_nonnull begin in the preamble, if one exists.
1853   ///
1854   /// Returns an invalid location if the premable did not end with
1855   /// such a pragma active or if there is no recorded preamble.
1856   SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const {
1857     return PreambleRecordedPragmaAssumeNonNullLoc;
1858   }
1859 
1860   /// Record the location of the unterminated \#pragma clang
1861   /// assume_nonnull begin in the preamble.
1862   void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) {
1863     PreambleRecordedPragmaAssumeNonNullLoc = Loc;
1864   }
1865 
1866   /// Set the directory in which the main file should be considered
1867   /// to have been found, if it is not a real file.
1868   void setMainFileDir(const DirectoryEntry *Dir) {
1869     MainFileDir = Dir;
1870   }
1871 
1872   /// Instruct the preprocessor to skip part of the main source file.
1873   ///
1874   /// \param Bytes The number of bytes in the preamble to skip.
1875   ///
1876   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1877   /// start of a line.
1878   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1879     SkipMainFilePreamble.first = Bytes;
1880     SkipMainFilePreamble.second = StartOfLine;
1881   }
1882 
1883   /// Forwarding function for diagnostics.  This emits a diagnostic at
1884   /// the specified Token's location, translating the token's start
1885   /// position in the current buffer into a SourcePosition object for rendering.
1886   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1887     return Diags->Report(Loc, DiagID);
1888   }
1889 
1890   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1891     return Diags->Report(Tok.getLocation(), DiagID);
1892   }
1893 
1894   /// Return the 'spelling' of the token at the given
1895   /// location; does not go up to the spelling location or down to the
1896   /// expansion location.
1897   ///
1898   /// \param buffer A buffer which will be used only if the token requires
1899   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
1900   /// \param invalid If non-null, will be set \c true if an error occurs.
1901   StringRef getSpelling(SourceLocation loc,
1902                         SmallVectorImpl<char> &buffer,
1903                         bool *invalid = nullptr) const {
1904     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1905   }
1906 
1907   /// Return the 'spelling' of the Tok token.
1908   ///
1909   /// The spelling of a token is the characters used to represent the token in
1910   /// the source file after trigraph expansion and escaped-newline folding.  In
1911   /// particular, this wants to get the true, uncanonicalized, spelling of
1912   /// things like digraphs, UCNs, etc.
1913   ///
1914   /// \param Invalid If non-null, will be set \c true if an error occurs.
1915   std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1916     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1917   }
1918 
1919   /// Get the spelling of a token into a preallocated buffer, instead
1920   /// of as an std::string.
1921   ///
1922   /// The caller is required to allocate enough space for the token, which is
1923   /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1924   /// actual result is returned.
1925   ///
1926   /// Note that this method may do two possible things: it may either fill in
1927   /// the buffer specified with characters, or it may *change the input pointer*
1928   /// to point to a constant buffer with the data already in it (avoiding a
1929   /// copy).  The caller is not allowed to modify the returned buffer pointer
1930   /// if an internal buffer is returned.
1931   unsigned getSpelling(const Token &Tok, const char *&Buffer,
1932                        bool *Invalid = nullptr) const {
1933     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1934   }
1935 
1936   /// Get the spelling of a token into a SmallVector.
1937   ///
1938   /// Note that the returned StringRef may not point to the
1939   /// supplied buffer if a copy can be avoided.
1940   StringRef getSpelling(const Token &Tok,
1941                         SmallVectorImpl<char> &Buffer,
1942                         bool *Invalid = nullptr) const;
1943 
1944   /// Relex the token at the specified location.
1945   /// \returns true if there was a failure, false on success.
1946   bool getRawToken(SourceLocation Loc, Token &Result,
1947                    bool IgnoreWhiteSpace = false) {
1948     return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1949   }
1950 
1951   /// Given a Token \p Tok that is a numeric constant with length 1,
1952   /// return the character.
1953   char
1954   getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1955                                               bool *Invalid = nullptr) const {
1956     assert(Tok.is(tok::numeric_constant) &&
1957            Tok.getLength() == 1 && "Called on unsupported token");
1958     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1959 
1960     // If the token is carrying a literal data pointer, just use it.
1961     if (const char *D = Tok.getLiteralData())
1962       return *D;
1963 
1964     // Otherwise, fall back on getCharacterData, which is slower, but always
1965     // works.
1966     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1967   }
1968 
1969   /// Retrieve the name of the immediate macro expansion.
1970   ///
1971   /// This routine starts from a source location, and finds the name of the
1972   /// macro responsible for its immediate expansion. It looks through any
1973   /// intervening macro argument expansions to compute this. It returns a
1974   /// StringRef that refers to the SourceManager-owned buffer of the source
1975   /// where that macro name is spelled. Thus, the result shouldn't out-live
1976   /// the SourceManager.
1977   StringRef getImmediateMacroName(SourceLocation Loc) {
1978     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1979   }
1980 
1981   /// Plop the specified string into a scratch buffer and set the
1982   /// specified token's location and length to it.
1983   ///
1984   /// If specified, the source location provides a location of the expansion
1985   /// point of the token.
1986   void CreateString(StringRef Str, Token &Tok,
1987                     SourceLocation ExpansionLocStart = SourceLocation(),
1988                     SourceLocation ExpansionLocEnd = SourceLocation());
1989 
1990   /// Split the first Length characters out of the token starting at TokLoc
1991   /// and return a location pointing to the split token. Re-lexing from the
1992   /// split token will return the split token rather than the original.
1993   SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
1994 
1995   /// Computes the source location just past the end of the
1996   /// token at this source location.
1997   ///
1998   /// This routine can be used to produce a source location that
1999   /// points just past the end of the token referenced by \p Loc, and
2000   /// is generally used when a diagnostic needs to point just after a
2001   /// token where it expected something different that it received. If
2002   /// the returned source location would not be meaningful (e.g., if
2003   /// it points into a macro), this routine returns an invalid
2004   /// source location.
2005   ///
2006   /// \param Offset an offset from the end of the token, where the source
2007   /// location should refer to. The default offset (0) produces a source
2008   /// location pointing just past the end of the token; an offset of 1 produces
2009   /// a source location pointing to the last character in the token, etc.
2010   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
2011     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
2012   }
2013 
2014   /// Returns true if the given MacroID location points at the first
2015   /// token of the macro expansion.
2016   ///
2017   /// \param MacroBegin If non-null and function returns true, it is set to
2018   /// begin location of the macro.
2019   bool isAtStartOfMacroExpansion(SourceLocation loc,
2020                                  SourceLocation *MacroBegin = nullptr) const {
2021     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
2022                                             MacroBegin);
2023   }
2024 
2025   /// Returns true if the given MacroID location points at the last
2026   /// token of the macro expansion.
2027   ///
2028   /// \param MacroEnd If non-null and function returns true, it is set to
2029   /// end location of the macro.
2030   bool isAtEndOfMacroExpansion(SourceLocation loc,
2031                                SourceLocation *MacroEnd = nullptr) const {
2032     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
2033   }
2034 
2035   /// Print the token to stderr, used for debugging.
2036   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
2037   void DumpLocation(SourceLocation Loc) const;
2038   void DumpMacro(const MacroInfo &MI) const;
2039   void dumpMacroInfo(const IdentifierInfo *II);
2040 
2041   /// Given a location that specifies the start of a
2042   /// token, return a new location that specifies a character within the token.
2043   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
2044                                          unsigned Char) const {
2045     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
2046   }
2047 
2048   /// Increment the counters for the number of token paste operations
2049   /// performed.
2050   ///
2051   /// If fast was specified, this is a 'fast paste' case we handled.
2052   void IncrementPasteCounter(bool isFast) {
2053     if (isFast)
2054       ++NumFastTokenPaste;
2055     else
2056       ++NumTokenPaste;
2057   }
2058 
2059   void PrintStats();
2060 
2061   size_t getTotalMemory() const;
2062 
2063   /// When the macro expander pastes together a comment (/##/) in Microsoft
2064   /// mode, this method handles updating the current state, returning the
2065   /// token on the next source line.
2066   void HandleMicrosoftCommentPaste(Token &Tok);
2067 
2068   //===--------------------------------------------------------------------===//
2069   // Preprocessor callback methods.  These are invoked by a lexer as various
2070   // directives and events are found.
2071 
2072   /// Given a tok::raw_identifier token, look up the
2073   /// identifier information for the token and install it into the token,
2074   /// updating the token kind accordingly.
2075   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
2076 
2077 private:
2078   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
2079 
2080 public:
2081   /// Specifies the reason for poisoning an identifier.
2082   ///
2083   /// If that identifier is accessed while poisoned, then this reason will be
2084   /// used instead of the default "poisoned" diagnostic.
2085   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
2086 
2087   /// Display reason for poisoned identifier.
2088   void HandlePoisonedIdentifier(Token & Identifier);
2089 
2090   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
2091     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
2092       if(II->isPoisoned()) {
2093         HandlePoisonedIdentifier(Identifier);
2094       }
2095     }
2096   }
2097 
2098 private:
2099   /// Identifiers used for SEH handling in Borland. These are only
2100   /// allowed in particular circumstances
2101   // __except block
2102   IdentifierInfo *Ident__exception_code,
2103                  *Ident___exception_code,
2104                  *Ident_GetExceptionCode;
2105   // __except filter expression
2106   IdentifierInfo *Ident__exception_info,
2107                  *Ident___exception_info,
2108                  *Ident_GetExceptionInfo;
2109   // __finally
2110   IdentifierInfo *Ident__abnormal_termination,
2111                  *Ident___abnormal_termination,
2112                  *Ident_AbnormalTermination;
2113 
2114   const char *getCurLexerEndPos();
2115   void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
2116 
2117 public:
2118   void PoisonSEHIdentifiers(bool Poison = true); // Borland
2119 
2120   /// Callback invoked when the lexer reads an identifier and has
2121   /// filled in the tokens IdentifierInfo member.
2122   ///
2123   /// This callback potentially macro expands it or turns it into a named
2124   /// token (like 'for').
2125   ///
2126   /// \returns true if we actually computed a token, false if we need to
2127   /// lex again.
2128   bool HandleIdentifier(Token &Identifier);
2129 
2130   /// Callback invoked when the lexer hits the end of the current file.
2131   ///
2132   /// This either returns the EOF token and returns true, or
2133   /// pops a level off the include stack and returns false, at which point the
2134   /// client should call lex again.
2135   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
2136 
2137   /// Callback invoked when the current TokenLexer hits the end of its
2138   /// token stream.
2139   bool HandleEndOfTokenLexer(Token &Result);
2140 
2141   /// Callback invoked when the lexer sees a # token at the start of a
2142   /// line.
2143   ///
2144   /// This consumes the directive, modifies the lexer/preprocessor state, and
2145   /// advances the lexer(s) so that the next token read is the correct one.
2146   void HandleDirective(Token &Result);
2147 
2148   /// Ensure that the next token is a tok::eod token.
2149   ///
2150   /// If not, emit a diagnostic and consume up until the eod.
2151   /// If \p EnableMacros is true, then we consider macros that expand to zero
2152   /// tokens as being ok.
2153   ///
2154   /// \return The location of the end of the directive (the terminating
2155   /// newline).
2156   SourceLocation CheckEndOfDirective(const char *DirType,
2157                                      bool EnableMacros = false);
2158 
2159   /// Read and discard all tokens remaining on the current line until
2160   /// the tok::eod token is found. Returns the range of the skipped tokens.
2161   SourceRange DiscardUntilEndOfDirective();
2162 
2163   /// Returns true if the preprocessor has seen a use of
2164   /// __DATE__ or __TIME__ in the file so far.
2165   bool SawDateOrTime() const {
2166     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
2167   }
2168   unsigned getCounterValue() const { return CounterValue; }
2169   void setCounterValue(unsigned V) { CounterValue = V; }
2170 
2171   LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const {
2172     assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine &&
2173            "FPEvalMethod should be set either from command line or from the "
2174            "target info");
2175     return CurrentFPEvalMethod;
2176   }
2177 
2178   LangOptions::FPEvalMethodKind getTUFPEvalMethod() const {
2179     return TUFPEvalMethod;
2180   }
2181 
2182   SourceLocation getLastFPEvalPragmaLocation() const {
2183     return LastFPEvalPragmaLocation;
2184   }
2185 
2186   LangOptions::FPEvalMethodKind getLastFPEvalMethod() const {
2187     return LastFPEvalMethod;
2188   }
2189 
2190   void setLastFPEvalMethod(LangOptions::FPEvalMethodKind Val) {
2191     LastFPEvalMethod = Val;
2192   }
2193 
2194   void setCurrentFPEvalMethod(SourceLocation PragmaLoc,
2195                               LangOptions::FPEvalMethodKind Val) {
2196     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2197            "FPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2198     // This is the location of the '#pragma float_control" where the
2199     // execution state is modifed.
2200     LastFPEvalPragmaLocation = PragmaLoc;
2201     CurrentFPEvalMethod = Val;
2202     TUFPEvalMethod = Val;
2203   }
2204 
2205   void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) {
2206     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2207            "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2208     TUFPEvalMethod = Val;
2209   }
2210 
2211   /// Retrieves the module that we're currently building, if any.
2212   Module *getCurrentModule();
2213 
2214   /// Allocate a new MacroInfo object with the provided SourceLocation.
2215   MacroInfo *AllocateMacroInfo(SourceLocation L);
2216 
2217   /// Turn the specified lexer token into a fully checked and spelled
2218   /// filename, e.g. as an operand of \#include.
2219   ///
2220   /// The caller is expected to provide a buffer that is large enough to hold
2221   /// the spelling of the filename, but is also expected to handle the case
2222   /// when this method decides to use a different buffer.
2223   ///
2224   /// \returns true if the input filename was in <>'s or false if it was
2225   /// in ""'s.
2226   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2227 
2228   /// Given a "foo" or \<foo> reference, look up the indicated file.
2229   ///
2230   /// Returns None on failure.  \p isAngled indicates whether the file
2231   /// reference is for system \#include's or not (i.e. using <> instead of "").
2232   Optional<FileEntryRef>
2233   LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2234              ConstSearchDirIterator FromDir, const FileEntry *FromFile,
2235              ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath,
2236              SmallVectorImpl<char> *RelativePath,
2237              ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2238              bool *IsFrameworkFound, bool SkipCache = false);
2239 
2240   /// Return true if we're in the top-level file, not in a \#include.
2241   bool isInPrimaryFile() const;
2242 
2243   /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2244   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
2245   bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2246 
2247   bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2248                       bool *ShadowFlag = nullptr);
2249 
2250   void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2251   Module *LeaveSubmodule(bool ForPragma);
2252 
2253 private:
2254   friend void TokenLexer::ExpandFunctionArguments();
2255 
2256   void PushIncludeMacroStack() {
2257     assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
2258     IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
2259                                    std::move(CurLexer), CurPPLexer,
2260                                    std::move(CurTokenLexer), CurDirLookup);
2261     CurPPLexer = nullptr;
2262   }
2263 
2264   void PopIncludeMacroStack() {
2265     CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2266     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2267     CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2268     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
2269     CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2270     CurLexerKind = IncludeMacroStack.back().CurLexerKind;
2271     IncludeMacroStack.pop_back();
2272   }
2273 
2274   void PropagateLineStartLeadingSpaceInfo(Token &Result);
2275 
2276   /// Determine whether we need to create module macros for #defines in the
2277   /// current context.
2278   bool needModuleMacros() const;
2279 
2280   /// Update the set of active module macros and ambiguity flag for a module
2281   /// macro name.
2282   void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2283 
2284   DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2285                                                SourceLocation Loc);
2286   UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2287   VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2288                                                              bool isPublic);
2289 
2290   /// Lex and validate a macro name, which occurs after a
2291   /// \#define or \#undef.
2292   ///
2293   /// \param MacroNameTok Token that represents the name defined or undefined.
2294   /// \param IsDefineUndef Kind if preprocessor directive.
2295   /// \param ShadowFlag Points to flag that is set if macro name shadows
2296   ///                   a keyword.
2297   ///
2298   /// This emits a diagnostic, sets the token kind to eod,
2299   /// and discards the rest of the macro line if the macro name is invalid.
2300   void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2301                      bool *ShadowFlag = nullptr);
2302 
2303   /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2304   /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2305   /// doing so performs certain validity checks including (but not limited to):
2306   ///   - # (stringization) is followed by a macro parameter
2307   /// \param MacroNameTok - Token that represents the macro name
2308   /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2309   ///
2310   ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2311   ///  returns a nullptr if an invalid sequence of tokens is encountered.
2312   MacroInfo *ReadOptionalMacroParameterListAndBody(
2313       const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2314 
2315   /// The ( starting an argument list of a macro definition has just been read.
2316   /// Lex the rest of the parameters and the closing ), updating \p MI with
2317   /// what we learn and saving in \p LastTok the last token read.
2318   /// Return true if an error occurs parsing the arg list.
2319   bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2320 
2321   /// Provide a suggestion for a typoed directive. If there is no typo, then
2322   /// just skip suggesting.
2323   ///
2324   /// \param Tok - Token that represents the directive
2325   /// \param Directive - String reference for the directive name
2326   void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const;
2327 
2328   /// We just read a \#if or related directive and decided that the
2329   /// subsequent tokens are in the \#if'd out portion of the
2330   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
2331   /// FoundNonSkipPortion is true, then we have already emitted code for part of
2332   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2333   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2334   /// already seen one so a \#else directive is a duplicate.  When this returns,
2335   /// the caller can lex the first valid token.
2336   void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2337                                     SourceLocation IfTokenLoc,
2338                                     bool FoundNonSkipPortion, bool FoundElse,
2339                                     SourceLocation ElseLoc = SourceLocation());
2340 
2341   /// Information about the result for evaluating an expression for a
2342   /// preprocessor directive.
2343   struct DirectiveEvalResult {
2344     /// Whether the expression was evaluated as true or not.
2345     bool Conditional;
2346 
2347     /// True if the expression contained identifiers that were undefined.
2348     bool IncludedUndefinedIds;
2349 
2350     /// The source range for the expression.
2351     SourceRange ExprRange;
2352   };
2353 
2354   /// Evaluate an integer constant expression that may occur after a
2355   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2356   ///
2357   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2358   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
2359 
2360   /// Process a '__has_include("path")' expression.
2361   ///
2362   /// Returns true if successful.
2363   bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II);
2364 
2365   /// Process '__has_include_next("path")' expression.
2366   ///
2367   /// Returns true if successful.
2368   bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II);
2369 
2370   /// Get the directory and file from which to start \#include_next lookup.
2371   std::pair<ConstSearchDirIterator, const FileEntry *>
2372   getIncludeNextStart(const Token &IncludeNextTok) const;
2373 
2374   /// Install the standard preprocessor pragmas:
2375   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2376   void RegisterBuiltinPragmas();
2377 
2378   /// Register builtin macros such as __LINE__ with the identifier table.
2379   void RegisterBuiltinMacros();
2380 
2381   /// If an identifier token is read that is to be expanded as a macro, handle
2382   /// it and return the next token as 'Tok'.  If we lexed a token, return true;
2383   /// otherwise the caller should lex again.
2384   bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2385 
2386   /// Cache macro expanded tokens for TokenLexers.
2387   //
2388   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2389   /// going to lex in the cache and when it finishes the tokens are removed
2390   /// from the end of the cache.
2391   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2392                                   ArrayRef<Token> tokens);
2393 
2394   void removeCachedMacroExpandedTokensOfLastLexer();
2395 
2396   /// Determine whether the next preprocessor token to be
2397   /// lexed is a '('.  If so, consume the token and return true, if not, this
2398   /// method should have no observable side-effect on the lexed tokens.
2399   bool isNextPPTokenLParen();
2400 
2401   /// After reading "MACRO(", this method is invoked to read all of the formal
2402   /// arguments specified for the macro invocation.  Returns null on error.
2403   MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2404                                        SourceLocation &MacroEnd);
2405 
2406   /// If an identifier token is read that is to be expanded
2407   /// as a builtin macro, handle it and return the next token as 'Tok'.
2408   void ExpandBuiltinMacro(Token &Tok);
2409 
2410   /// Read a \c _Pragma directive, slice it up, process it, then
2411   /// return the first token after the directive.
2412   /// This assumes that the \c _Pragma token has just been read into \p Tok.
2413   void Handle_Pragma(Token &Tok);
2414 
2415   /// Like Handle_Pragma except the pragma text is not enclosed within
2416   /// a string literal.
2417   void HandleMicrosoft__pragma(Token &Tok);
2418 
2419   /// Add a lexer to the top of the include stack and
2420   /// start lexing tokens from it instead of the current buffer.
2421   void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir);
2422 
2423   /// Set the FileID for the preprocessor predefines.
2424   void setPredefinesFileID(FileID FID) {
2425     assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2426     PredefinesFileID = FID;
2427   }
2428 
2429   /// Set the FileID for the PCH through header.
2430   void setPCHThroughHeaderFileID(FileID FID);
2431 
2432   /// Returns true if we are lexing from a file and not a
2433   /// pragma or a macro.
2434   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2435     return L ? !L->isPragmaLexer() : P != nullptr;
2436   }
2437 
2438   static bool IsFileLexer(const IncludeStackInfo& I) {
2439     return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2440   }
2441 
2442   bool IsFileLexer() const {
2443     return IsFileLexer(CurLexer.get(), CurPPLexer);
2444   }
2445 
2446   //===--------------------------------------------------------------------===//
2447   // Caching stuff.
2448   void CachingLex(Token &Result);
2449 
2450   bool InCachingLexMode() const {
2451     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2452     // that we are past EOF, not that we are in CachingLex mode.
2453     return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2454   }
2455 
2456   void EnterCachingLexMode();
2457   void EnterCachingLexModeUnchecked();
2458 
2459   void ExitCachingLexMode() {
2460     if (InCachingLexMode())
2461       RemoveTopOfLexerStack();
2462   }
2463 
2464   const Token &PeekAhead(unsigned N);
2465   void AnnotatePreviousCachedTokens(const Token &Tok);
2466 
2467   //===--------------------------------------------------------------------===//
2468   /// Handle*Directive - implement the various preprocessor directives.  These
2469   /// should side-effect the current preprocessor object so that the next call
2470   /// to Lex() will return the appropriate token next.
2471   void HandleLineDirective();
2472   void HandleDigitDirective(Token &Tok);
2473   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2474   void HandleIdentSCCSDirective(Token &Tok);
2475   void HandleMacroPublicDirective(Token &Tok);
2476   void HandleMacroPrivateDirective();
2477 
2478   /// An additional notification that can be produced by a header inclusion or
2479   /// import to tell the parser what happened.
2480   struct ImportAction {
2481     enum ActionKind {
2482       None,
2483       ModuleBegin,
2484       ModuleImport,
2485       HeaderUnitImport,
2486       SkippedModuleImport,
2487       Failure,
2488     } Kind;
2489     Module *ModuleForHeader = nullptr;
2490 
2491     ImportAction(ActionKind AK, Module *Mod = nullptr)
2492         : Kind(AK), ModuleForHeader(Mod) {
2493       assert((AK == None || Mod || AK == Failure) &&
2494              "no module for module action");
2495     }
2496   };
2497 
2498   Optional<FileEntryRef> LookupHeaderIncludeOrImport(
2499       ConstSearchDirIterator *CurDir, StringRef &Filename,
2500       SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2501       const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2502       bool &IsMapped, ConstSearchDirIterator LookupFrom,
2503       const FileEntry *LookupFromFile, StringRef &LookupFilename,
2504       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2505       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2506 
2507   // File inclusion.
2508   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2509                               ConstSearchDirIterator LookupFrom = nullptr,
2510                               const FileEntry *LookupFromFile = nullptr);
2511   ImportAction
2512   HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2513                               Token &FilenameTok, SourceLocation EndLoc,
2514                               ConstSearchDirIterator LookupFrom = nullptr,
2515                               const FileEntry *LookupFromFile = nullptr);
2516   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2517   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2518   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2519   void HandleMicrosoftImportDirective(Token &Tok);
2520 
2521 public:
2522   /// Check that the given module is available, producing a diagnostic if not.
2523   /// \return \c true if the check failed (because the module is not available).
2524   ///         \c false if the module appears to be usable.
2525   static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2526                                      const TargetInfo &TargetInfo,
2527                                      DiagnosticsEngine &Diags, Module *M);
2528 
2529   // Module inclusion testing.
2530   /// Find the module that owns the source or header file that
2531   /// \p Loc points to. If the location is in a file that was included
2532   /// into a module, or is outside any module, returns nullptr.
2533   Module *getModuleForLocation(SourceLocation Loc);
2534 
2535   /// We want to produce a diagnostic at location IncLoc concerning an
2536   /// unreachable effect at location MLoc (eg, where a desired entity was
2537   /// declared or defined). Determine whether the right way to make MLoc
2538   /// reachable is by #include, and if so, what header should be included.
2539   ///
2540   /// This is not necessarily fast, and might load unexpected module maps, so
2541   /// should only be called by code that intends to produce an error.
2542   ///
2543   /// \param IncLoc The location at which the missing effect was detected.
2544   /// \param MLoc A location within an unimported module at which the desired
2545   ///        effect occurred.
2546   /// \return A file that can be #included to provide the desired effect. Null
2547   ///         if no such file could be determined or if a #include is not
2548   ///         appropriate (eg, if a module should be imported instead).
2549   const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2550                                                     SourceLocation MLoc);
2551 
2552   bool isRecordingPreamble() const {
2553     return PreambleConditionalStack.isRecording();
2554   }
2555 
2556   bool hasRecordedPreamble() const {
2557     return PreambleConditionalStack.hasRecordedPreamble();
2558   }
2559 
2560   ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2561       return PreambleConditionalStack.getStack();
2562   }
2563 
2564   void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2565     PreambleConditionalStack.setStack(s);
2566   }
2567 
2568   void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,
2569                                              llvm::Optional<PreambleSkipInfo> SkipInfo) {
2570     PreambleConditionalStack.startReplaying();
2571     PreambleConditionalStack.setStack(s);
2572     PreambleConditionalStack.SkipInfo = SkipInfo;
2573   }
2574 
2575   llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2576     return PreambleConditionalStack.SkipInfo;
2577   }
2578 
2579 private:
2580   /// After processing predefined file, initialize the conditional stack from
2581   /// the preamble.
2582   void replayPreambleConditionalStack();
2583 
2584   // Macro handling.
2585   void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2586   void HandleUndefDirective();
2587 
2588   // Conditional Inclusion.
2589   void HandleIfdefDirective(Token &Result, const Token &HashToken,
2590                             bool isIfndef, bool ReadAnyTokensBeforeDirective);
2591   void HandleIfDirective(Token &IfToken, const Token &HashToken,
2592                          bool ReadAnyTokensBeforeDirective);
2593   void HandleEndifDirective(Token &EndifToken);
2594   void HandleElseDirective(Token &Result, const Token &HashToken);
2595   void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2596                                  tok::PPKeywordKind Kind);
2597 
2598   // Pragmas.
2599   void HandlePragmaDirective(PragmaIntroducer Introducer);
2600 
2601 public:
2602   void HandlePragmaOnce(Token &OnceTok);
2603   void HandlePragmaMark(Token &MarkTok);
2604   void HandlePragmaPoison();
2605   void HandlePragmaSystemHeader(Token &SysHeaderTok);
2606   void HandlePragmaDependency(Token &DependencyTok);
2607   void HandlePragmaPushMacro(Token &Tok);
2608   void HandlePragmaPopMacro(Token &Tok);
2609   void HandlePragmaIncludeAlias(Token &Tok);
2610   void HandlePragmaModuleBuild(Token &Tok);
2611   void HandlePragmaHdrstop(Token &Tok);
2612   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2613 
2614   // Return true and store the first token only if any CommentHandler
2615   // has inserted some tokens and getCommentRetentionState() is false.
2616   bool HandleComment(Token &result, SourceRange Comment);
2617 
2618   /// A macro is used, update information about macros that need unused
2619   /// warnings.
2620   void markMacroAsUsed(MacroInfo *MI);
2621 
2622   void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
2623                               SourceLocation AnnotationLoc) {
2624     auto Annotations = AnnotationInfos.find(II);
2625     if (Annotations == AnnotationInfos.end())
2626       AnnotationInfos.insert(std::make_pair(
2627           II,
2628           MacroAnnotations::makeDeprecation(AnnotationLoc, std::move(Msg))));
2629     else
2630       Annotations->second.DeprecationInfo =
2631           MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2632   }
2633 
2634   void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
2635                                SourceLocation AnnotationLoc) {
2636     auto Annotations = AnnotationInfos.find(II);
2637     if (Annotations == AnnotationInfos.end())
2638       AnnotationInfos.insert(
2639           std::make_pair(II, MacroAnnotations::makeRestrictExpansion(
2640                                  AnnotationLoc, std::move(Msg))));
2641     else
2642       Annotations->second.RestrictExpansionInfo =
2643           MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2644   }
2645 
2646   void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
2647     auto Annotations = AnnotationInfos.find(II);
2648     if (Annotations == AnnotationInfos.end())
2649       AnnotationInfos.insert(
2650           std::make_pair(II, MacroAnnotations::makeFinal(AnnotationLoc)));
2651     else
2652       Annotations->second.FinalAnnotationLoc = AnnotationLoc;
2653   }
2654 
2655   const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
2656     return AnnotationInfos.find(II)->second;
2657   }
2658 
2659   void emitMacroExpansionWarnings(const Token &Identifier) const {
2660     if (Identifier.getIdentifierInfo()->isDeprecatedMacro())
2661       emitMacroDeprecationWarning(Identifier);
2662 
2663     if (Identifier.getIdentifierInfo()->isRestrictExpansion() &&
2664         !SourceMgr.isInMainFile(Identifier.getLocation()))
2665       emitRestrictExpansionWarning(Identifier);
2666   }
2667 
2668   static void processPathForFileMacro(SmallVectorImpl<char> &Path,
2669                                       const LangOptions &LangOpts,
2670                                       const TargetInfo &TI);
2671 
2672 private:
2673   void emitMacroDeprecationWarning(const Token &Identifier) const;
2674   void emitRestrictExpansionWarning(const Token &Identifier) const;
2675   void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
2676 };
2677 
2678 /// Abstract base class that describes a handler that will receive
2679 /// source ranges for each of the comments encountered in the source file.
2680 class CommentHandler {
2681 public:
2682   virtual ~CommentHandler();
2683 
2684   // The handler shall return true if it has pushed any tokens
2685   // to be read using e.g. EnterToken or EnterTokenStream.
2686   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
2687 };
2688 
2689 /// Abstract base class that describes a handler that will receive
2690 /// source ranges for empty lines encountered in the source file.
2691 class EmptylineHandler {
2692 public:
2693   virtual ~EmptylineHandler();
2694 
2695   // The handler handles empty lines.
2696   virtual void HandleEmptyline(SourceRange Range) = 0;
2697 };
2698 
2699 /// Registry of pragma handlers added by plugins
2700 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
2701 
2702 } // namespace clang
2703 
2704 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
2705