1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
16 
17 #include "clang/Basic/Diagnostic.h"
18 #include "clang/Basic/IdentifierTable.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/LangOptions.h"
21 #include "clang/Basic/Module.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Basic/TokenKinds.h"
25 #include "clang/Lex/Lexer.h"
26 #include "clang/Lex/MacroInfo.h"
27 #include "clang/Lex/ModuleLoader.h"
28 #include "clang/Lex/ModuleMap.h"
29 #include "clang/Lex/PPCallbacks.h"
30 #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
31 #include "clang/Lex/Token.h"
32 #include "clang/Lex/TokenLexer.h"
33 #include "llvm/ADT/ArrayRef.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/FoldingSet.h"
36 #include "llvm/ADT/FunctionExtras.h"
37 #include "llvm/ADT/None.h"
38 #include "llvm/ADT/Optional.h"
39 #include "llvm/ADT/PointerUnion.h"
40 #include "llvm/ADT/STLExtras.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/ADT/TinyPtrVector.h"
45 #include "llvm/ADT/iterator_range.h"
46 #include "llvm/Support/Allocator.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Registry.h"
49 #include <cassert>
50 #include <cstddef>
51 #include <cstdint>
52 #include <map>
53 #include <memory>
54 #include <string>
55 #include <utility>
56 #include <vector>
57 
58 namespace llvm {
59 
60 template<unsigned InternalLen> class SmallString;
61 
62 } // namespace llvm
63 
64 namespace clang {
65 
66 class CodeCompletionHandler;
67 class CommentHandler;
68 class DirectoryEntry;
69 class DirectoryLookup;
70 class ExternalPreprocessorSource;
71 class FileEntry;
72 class FileManager;
73 class HeaderSearch;
74 class MacroArgs;
75 class PragmaHandler;
76 class PragmaNamespace;
77 class PreprocessingRecord;
78 class PreprocessorLexer;
79 class PreprocessorOptions;
80 class ScratchBuffer;
81 class TargetInfo;
82 
83 namespace Builtin {
84 class Context;
85 }
86 
87 /// Stores token information for comparing actual tokens with
88 /// predefined values.  Only handles simple tokens and identifiers.
89 class TokenValue {
90   tok::TokenKind Kind;
91   IdentifierInfo *II;
92 
93 public:
TokenValue(tok::TokenKind Kind)94   TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
95     assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
96     assert(Kind != tok::identifier &&
97            "Identifiers should be created by TokenValue(IdentifierInfo *)");
98     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
99     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
100   }
101 
TokenValue(IdentifierInfo * II)102   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
103 
104   bool operator==(const Token &Tok) const {
105     return Tok.getKind() == Kind &&
106         (!II || II == Tok.getIdentifierInfo());
107   }
108 };
109 
110 /// Context in which macro name is used.
111 enum MacroUse {
112   // other than #define or #undef
113   MU_Other  = 0,
114 
115   // macro name specified in #define
116   MU_Define = 1,
117 
118   // macro name specified in #undef
119   MU_Undef  = 2
120 };
121 
122 /// Engages in a tight little dance with the lexer to efficiently
123 /// preprocess tokens.
124 ///
125 /// Lexers know only about tokens within a single source file, and don't
126 /// know anything about preprocessor-level issues like the \#include stack,
127 /// token expansion, etc.
128 class Preprocessor {
129   friend class VAOptDefinitionContext;
130   friend class VariadicMacroScopeGuard;
131 
132   llvm::unique_function<void(const clang::Token &)> OnToken;
133   std::shared_ptr<PreprocessorOptions> PPOpts;
134   DiagnosticsEngine        *Diags;
135   LangOptions       &LangOpts;
136   const TargetInfo *Target = nullptr;
137   const TargetInfo *AuxTarget = nullptr;
138   FileManager       &FileMgr;
139   SourceManager     &SourceMgr;
140   std::unique_ptr<ScratchBuffer> ScratchBuf;
141   HeaderSearch      &HeaderInfo;
142   ModuleLoader      &TheModuleLoader;
143 
144   /// External source of macros.
145   ExternalPreprocessorSource *ExternalSource;
146 
147   /// A BumpPtrAllocator object used to quickly allocate and release
148   /// objects internal to the Preprocessor.
149   llvm::BumpPtrAllocator BP;
150 
151   /// Identifiers for builtin macros and other builtins.
152   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
153   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
154   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
155   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
156   IdentifierInfo *Ident__FILE_NAME__;              // __FILE_NAME__
157   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
158   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
159   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
160   IdentifierInfo *Ident__identifier;               // __identifier
161   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
162   IdentifierInfo *Ident__VA_OPT__;                 // __VA_OPT__
163   IdentifierInfo *Ident__has_feature;              // __has_feature
164   IdentifierInfo *Ident__has_extension;            // __has_extension
165   IdentifierInfo *Ident__has_builtin;              // __has_builtin
166   IdentifierInfo *Ident__has_attribute;            // __has_attribute
167   IdentifierInfo *Ident__has_include;              // __has_include
168   IdentifierInfo *Ident__has_include_next;         // __has_include_next
169   IdentifierInfo *Ident__has_warning;              // __has_warning
170   IdentifierInfo *Ident__is_identifier;            // __is_identifier
171   IdentifierInfo *Ident__building_module;          // __building_module
172   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
173   IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
174   IdentifierInfo *Ident__has_c_attribute;          // __has_c_attribute
175   IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
176   IdentifierInfo *Ident__is_target_arch;           // __is_target_arch
177   IdentifierInfo *Ident__is_target_vendor;         // __is_target_vendor
178   IdentifierInfo *Ident__is_target_os;             // __is_target_os
179   IdentifierInfo *Ident__is_target_environment;    // __is_target_environment
180 
181   // Weak, only valid (and set) while InMacroArgs is true.
182   Token* ArgMacro;
183 
184   SourceLocation DATELoc, TIMELoc;
185 
186   // Next __COUNTER__ value, starts at 0.
187   unsigned CounterValue = 0;
188 
189   enum {
190     /// Maximum depth of \#includes.
191     MaxAllowedIncludeStackDepth = 200
192   };
193 
194   // State that is set before the preprocessor begins.
195   bool KeepComments : 1;
196   bool KeepMacroComments : 1;
197   bool SuppressIncludeNotFoundError : 1;
198 
199   // State that changes while the preprocessor runs:
200   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
201 
202   /// Whether the preprocessor owns the header search object.
203   bool OwnsHeaderSearch : 1;
204 
205   /// True if macro expansion is disabled.
206   bool DisableMacroExpansion : 1;
207 
208   /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
209   /// when parsing preprocessor directives.
210   bool MacroExpansionInDirectivesOverride : 1;
211 
212   class ResetMacroExpansionHelper;
213 
214   /// Whether we have already loaded macros from the external source.
215   mutable bool ReadMacrosFromExternalSource : 1;
216 
217   /// True if pragmas are enabled.
218   bool PragmasEnabled : 1;
219 
220   /// True if the current build action is a preprocessing action.
221   bool PreprocessedOutput : 1;
222 
223   /// True if we are currently preprocessing a #if or #elif directive
224   bool ParsingIfOrElifDirective;
225 
226   /// True if we are pre-expanding macro arguments.
227   bool InMacroArgPreExpansion;
228 
229   /// Mapping/lookup information for all identifiers in
230   /// the program, including program keywords.
231   mutable IdentifierTable Identifiers;
232 
233   /// This table contains all the selectors in the program.
234   ///
235   /// Unlike IdentifierTable above, this table *isn't* populated by the
236   /// preprocessor. It is declared/expanded here because its role/lifetime is
237   /// conceptually similar to the IdentifierTable. In addition, the current
238   /// control flow (in clang::ParseAST()), make it convenient to put here.
239   ///
240   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
241   /// the lifetime of the preprocessor.
242   SelectorTable Selectors;
243 
244   /// Information about builtins.
245   std::unique_ptr<Builtin::Context> BuiltinInfo;
246 
247   /// Tracks all of the pragmas that the client registered
248   /// with this preprocessor.
249   std::unique_ptr<PragmaNamespace> PragmaHandlers;
250 
251   /// Pragma handlers of the original source is stored here during the
252   /// parsing of a model file.
253   std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
254 
255   /// Tracks all of the comment handlers that the client registered
256   /// with this preprocessor.
257   std::vector<CommentHandler *> CommentHandlers;
258 
259   /// True if we want to ignore EOF token and continue later on (thus
260   /// avoid tearing the Lexer and etc. down).
261   bool IncrementalProcessing = false;
262 
263   /// The kind of translation unit we are processing.
264   TranslationUnitKind TUKind;
265 
266   /// The code-completion handler.
267   CodeCompletionHandler *CodeComplete = nullptr;
268 
269   /// The file that we're performing code-completion for, if any.
270   const FileEntry *CodeCompletionFile = nullptr;
271 
272   /// The offset in file for the code-completion point.
273   unsigned CodeCompletionOffset = 0;
274 
275   /// The location for the code-completion point. This gets instantiated
276   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
277   SourceLocation CodeCompletionLoc;
278 
279   /// The start location for the file of the code-completion point.
280   ///
281   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
282   /// for preprocessing.
283   SourceLocation CodeCompletionFileLoc;
284 
285   /// The source location of the \c import contextual keyword we just
286   /// lexed, if any.
287   SourceLocation ModuleImportLoc;
288 
289   /// The module import path that we're currently processing.
290   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
291 
292   /// Whether the last token we lexed was an '@'.
293   bool LastTokenWasAt = false;
294 
295   /// A position within a C++20 import-seq.
296   class ImportSeq {
297   public:
298     enum State : int {
299       // Positive values represent a number of unclosed brackets.
300       AtTopLevel = 0,
301       AfterTopLevelTokenSeq = -1,
302       AfterExport = -2,
303       AfterImportSeq = -3,
304     };
305 
ImportSeq(State S)306     ImportSeq(State S) : S(S) {}
307 
308     /// Saw any kind of open bracket.
handleOpenBracket()309     void handleOpenBracket() {
310       S = static_cast<State>(std::max<int>(S, 0) + 1);
311     }
312     /// Saw any kind of close bracket other than '}'.
handleCloseBracket()313     void handleCloseBracket() {
314       S = static_cast<State>(std::max<int>(S, 1) - 1);
315     }
316     /// Saw a close brace.
handleCloseBrace()317     void handleCloseBrace() {
318       handleCloseBracket();
319       if (S == AtTopLevel && !AfterHeaderName)
320         S = AfterTopLevelTokenSeq;
321     }
322     /// Saw a semicolon.
handleSemi()323     void handleSemi() {
324       if (atTopLevel()) {
325         S = AfterTopLevelTokenSeq;
326         AfterHeaderName = false;
327       }
328     }
329 
330     /// Saw an 'export' identifier.
handleExport()331     void handleExport() {
332       if (S == AfterTopLevelTokenSeq)
333         S = AfterExport;
334       else if (S <= 0)
335         S = AtTopLevel;
336     }
337     /// Saw an 'import' identifier.
handleImport()338     void handleImport() {
339       if (S == AfterTopLevelTokenSeq || S == AfterExport)
340         S = AfterImportSeq;
341       else if (S <= 0)
342         S = AtTopLevel;
343     }
344 
345     /// Saw a 'header-name' token; do not recognize any more 'import' tokens
346     /// until we reach a top-level semicolon.
handleHeaderName()347     void handleHeaderName() {
348       if (S == AfterImportSeq)
349         AfterHeaderName = true;
350       handleMisc();
351     }
352 
353     /// Saw any other token.
handleMisc()354     void handleMisc() {
355       if (S <= 0)
356         S = AtTopLevel;
357     }
358 
atTopLevel()359     bool atTopLevel() { return S <= 0; }
afterImportSeq()360     bool afterImportSeq() { return S == AfterImportSeq; }
361 
362   private:
363     State S;
364     /// Whether we're in the pp-import-suffix following the header-name in a
365     /// pp-import. If so, a close-brace is not sufficient to end the
366     /// top-level-token-seq of an import-seq.
367     bool AfterHeaderName = false;
368   };
369 
370   /// Our current position within a C++20 import-seq.
371   ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
372 
373   /// Whether the module import expects an identifier next. Otherwise,
374   /// it expects a '.' or ';'.
375   bool ModuleImportExpectsIdentifier = false;
376 
377   /// The identifier and source location of the currently-active
378   /// \#pragma clang arc_cf_code_audited begin.
379   std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
380 
381   /// The source location of the currently-active
382   /// \#pragma clang assume_nonnull begin.
383   SourceLocation PragmaAssumeNonNullLoc;
384 
385   /// True if we hit the code-completion point.
386   bool CodeCompletionReached = false;
387 
388   /// The code completion token containing the information
389   /// on the stem that is to be code completed.
390   IdentifierInfo *CodeCompletionII = nullptr;
391 
392   /// Range for the code completion token.
393   SourceRange CodeCompletionTokenRange;
394 
395   /// The directory that the main file should be considered to occupy,
396   /// if it does not correspond to a real file (as happens when building a
397   /// module).
398   const DirectoryEntry *MainFileDir = nullptr;
399 
400   /// The number of bytes that we will initially skip when entering the
401   /// main file, along with a flag that indicates whether skipping this number
402   /// of bytes will place the lexer at the start of a line.
403   ///
404   /// This is used when loading a precompiled preamble.
405   std::pair<int, bool> SkipMainFilePreamble;
406 
407   /// Whether we hit an error due to reaching max allowed include depth. Allows
408   /// to avoid hitting the same error over and over again.
409   bool HasReachedMaxIncludeDepth = false;
410 
411   /// The number of currently-active calls to Lex.
412   ///
413   /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
414   /// require asking for multiple additional tokens. This counter makes it
415   /// possible for Lex to detect whether it's producing a token for the end
416   /// of phase 4 of translation or for some other situation.
417   unsigned LexLevel = 0;
418 
419   /// The number of (LexLevel 0) preprocessor tokens.
420   unsigned TokenCount = 0;
421 
422   /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
423   /// warning, or zero for unlimited.
424   unsigned MaxTokens = 0;
425   SourceLocation MaxTokensOverrideLoc;
426 
427 public:
428   struct PreambleSkipInfo {
429     SourceLocation HashTokenLoc;
430     SourceLocation IfTokenLoc;
431     bool FoundNonSkipPortion;
432     bool FoundElse;
433     SourceLocation ElseLoc;
434 
PreambleSkipInfoPreambleSkipInfo435     PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
436                      bool FoundNonSkipPortion, bool FoundElse,
437                      SourceLocation ElseLoc)
438         : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
439           FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
440           ElseLoc(ElseLoc) {}
441   };
442 
443 private:
444   friend class ASTReader;
445   friend class MacroArgs;
446 
447   class PreambleConditionalStackStore {
448     enum State {
449       Off = 0,
450       Recording = 1,
451       Replaying = 2,
452     };
453 
454   public:
455     PreambleConditionalStackStore() = default;
456 
startRecording()457     void startRecording() { ConditionalStackState = Recording; }
startReplaying()458     void startReplaying() { ConditionalStackState = Replaying; }
isRecording()459     bool isRecording() const { return ConditionalStackState == Recording; }
isReplaying()460     bool isReplaying() const { return ConditionalStackState == Replaying; }
461 
getStack()462     ArrayRef<PPConditionalInfo> getStack() const {
463       return ConditionalStack;
464     }
465 
doneReplaying()466     void doneReplaying() {
467       ConditionalStack.clear();
468       ConditionalStackState = Off;
469     }
470 
setStack(ArrayRef<PPConditionalInfo> s)471     void setStack(ArrayRef<PPConditionalInfo> s) {
472       if (!isRecording() && !isReplaying())
473         return;
474       ConditionalStack.clear();
475       ConditionalStack.append(s.begin(), s.end());
476     }
477 
hasRecordedPreamble()478     bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
479 
reachedEOFWhileSkipping()480     bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); }
481 
clearSkipInfo()482     void clearSkipInfo() { SkipInfo.reset(); }
483 
484     llvm::Optional<PreambleSkipInfo> SkipInfo;
485 
486   private:
487     SmallVector<PPConditionalInfo, 4> ConditionalStack;
488     State ConditionalStackState = Off;
489   } PreambleConditionalStack;
490 
491   /// The current top of the stack that we're lexing from if
492   /// not expanding a macro and we are lexing directly from source code.
493   ///
494   /// Only one of CurLexer, or CurTokenLexer will be non-null.
495   std::unique_ptr<Lexer> CurLexer;
496 
497   /// The current top of the stack what we're lexing from
498   /// if not expanding a macro.
499   ///
500   /// This is an alias for CurLexer.
501   PreprocessorLexer *CurPPLexer = nullptr;
502 
503   /// Used to find the current FileEntry, if CurLexer is non-null
504   /// and if applicable.
505   ///
506   /// This allows us to implement \#include_next and find directory-specific
507   /// properties.
508   const DirectoryLookup *CurDirLookup = nullptr;
509 
510   /// The current macro we are expanding, if we are expanding a macro.
511   ///
512   /// One of CurLexer and CurTokenLexer must be null.
513   std::unique_ptr<TokenLexer> CurTokenLexer;
514 
515   /// The kind of lexer we're currently working with.
516   enum CurLexerKind {
517     CLK_Lexer,
518     CLK_TokenLexer,
519     CLK_CachingLexer,
520     CLK_LexAfterModuleImport
521   } CurLexerKind = CLK_Lexer;
522 
523   /// If the current lexer is for a submodule that is being built, this
524   /// is that submodule.
525   Module *CurLexerSubmodule = nullptr;
526 
527   /// Keeps track of the stack of files currently
528   /// \#included, and macros currently being expanded from, not counting
529   /// CurLexer/CurTokenLexer.
530   struct IncludeStackInfo {
531     enum CurLexerKind           CurLexerKind;
532     Module                     *TheSubmodule;
533     std::unique_ptr<Lexer>      TheLexer;
534     PreprocessorLexer          *ThePPLexer;
535     std::unique_ptr<TokenLexer> TheTokenLexer;
536     const DirectoryLookup      *TheDirLookup;
537 
538     // The following constructors are completely useless copies of the default
539     // versions, only needed to pacify MSVC.
IncludeStackInfoIncludeStackInfo540     IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
541                      std::unique_ptr<Lexer> &&TheLexer,
542                      PreprocessorLexer *ThePPLexer,
543                      std::unique_ptr<TokenLexer> &&TheTokenLexer,
544                      const DirectoryLookup *TheDirLookup)
545         : CurLexerKind(std::move(CurLexerKind)),
546           TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
547           ThePPLexer(std::move(ThePPLexer)),
548           TheTokenLexer(std::move(TheTokenLexer)),
549           TheDirLookup(std::move(TheDirLookup)) {}
550   };
551   std::vector<IncludeStackInfo> IncludeMacroStack;
552 
553   /// Actions invoked when some preprocessor activity is
554   /// encountered (e.g. a file is \#included, etc).
555   std::unique_ptr<PPCallbacks> Callbacks;
556 
557   struct MacroExpandsInfo {
558     Token Tok;
559     MacroDefinition MD;
560     SourceRange Range;
561 
MacroExpandsInfoMacroExpandsInfo562     MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
563         : Tok(Tok), MD(MD), Range(Range) {}
564   };
565   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
566 
567   /// Information about a name that has been used to define a module macro.
568   struct ModuleMacroInfo {
569     /// The most recent macro directive for this identifier.
570     MacroDirective *MD;
571 
572     /// The active module macros for this identifier.
573     llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
574 
575     /// The generation number at which we last updated ActiveModuleMacros.
576     /// \see Preprocessor::VisibleModules.
577     unsigned ActiveModuleMacrosGeneration = 0;
578 
579     /// Whether this macro name is ambiguous.
580     bool IsAmbiguous = false;
581 
582     /// The module macros that are overridden by this macro.
583     llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
584 
ModuleMacroInfoModuleMacroInfo585     ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
586   };
587 
588   /// The state of a macro for an identifier.
589   class MacroState {
590     mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
591 
getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)592     ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
593                                    const IdentifierInfo *II) const {
594       if (II->isOutOfDate())
595         PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
596       // FIXME: Find a spare bit on IdentifierInfo and store a
597       //        HasModuleMacros flag.
598       if (!II->hasMacroDefinition() ||
599           (!PP.getLangOpts().Modules &&
600            !PP.getLangOpts().ModulesLocalVisibility) ||
601           !PP.CurSubmoduleState->VisibleModules.getGeneration())
602         return nullptr;
603 
604       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
605       if (!Info) {
606         Info = new (PP.getPreprocessorAllocator())
607             ModuleMacroInfo(State.get<MacroDirective *>());
608         State = Info;
609       }
610 
611       if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
612           Info->ActiveModuleMacrosGeneration)
613         PP.updateModuleMacroInfo(II, *Info);
614       return Info;
615     }
616 
617   public:
MacroState()618     MacroState() : MacroState(nullptr) {}
MacroState(MacroDirective * MD)619     MacroState(MacroDirective *MD) : State(MD) {}
620 
MacroState(MacroState && O)621     MacroState(MacroState &&O) noexcept : State(O.State) {
622       O.State = (MacroDirective *)nullptr;
623     }
624 
625     MacroState &operator=(MacroState &&O) noexcept {
626       auto S = O.State;
627       O.State = (MacroDirective *)nullptr;
628       State = S;
629       return *this;
630     }
631 
~MacroState()632     ~MacroState() {
633       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
634         Info->~ModuleMacroInfo();
635     }
636 
getLatest()637     MacroDirective *getLatest() const {
638       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
639         return Info->MD;
640       return State.get<MacroDirective*>();
641     }
642 
setLatest(MacroDirective * MD)643     void setLatest(MacroDirective *MD) {
644       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
645         Info->MD = MD;
646       else
647         State = MD;
648     }
649 
isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)650     bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
651       auto *Info = getModuleInfo(PP, II);
652       return Info ? Info->IsAmbiguous : false;
653     }
654 
655     ArrayRef<ModuleMacro *>
getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)656     getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
657       if (auto *Info = getModuleInfo(PP, II))
658         return Info->ActiveModuleMacros;
659       return None;
660     }
661 
findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)662     MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
663                                                SourceManager &SourceMgr) const {
664       // FIXME: Incorporate module macros into the result of this.
665       if (auto *Latest = getLatest())
666         return Latest->findDirectiveAtLoc(Loc, SourceMgr);
667       return {};
668     }
669 
overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)670     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
671       if (auto *Info = getModuleInfo(PP, II)) {
672         Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
673                                       Info->ActiveModuleMacros.begin(),
674                                       Info->ActiveModuleMacros.end());
675         Info->ActiveModuleMacros.clear();
676         Info->IsAmbiguous = false;
677       }
678     }
679 
getOverriddenMacros()680     ArrayRef<ModuleMacro*> getOverriddenMacros() const {
681       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
682         return Info->OverriddenMacros;
683       return None;
684     }
685 
setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)686     void setOverriddenMacros(Preprocessor &PP,
687                              ArrayRef<ModuleMacro *> Overrides) {
688       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
689       if (!Info) {
690         if (Overrides.empty())
691           return;
692         Info = new (PP.getPreprocessorAllocator())
693             ModuleMacroInfo(State.get<MacroDirective *>());
694         State = Info;
695       }
696       Info->OverriddenMacros.clear();
697       Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
698                                     Overrides.begin(), Overrides.end());
699       Info->ActiveModuleMacrosGeneration = 0;
700     }
701   };
702 
703   /// For each IdentifierInfo that was associated with a macro, we
704   /// keep a mapping to the history of all macro definitions and #undefs in
705   /// the reverse order (the latest one is in the head of the list).
706   ///
707   /// This mapping lives within the \p CurSubmoduleState.
708   using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
709 
710   struct SubmoduleState;
711 
712   /// Information about a submodule that we're currently building.
713   struct BuildingSubmoduleInfo {
714     /// The module that we are building.
715     Module *M;
716 
717     /// The location at which the module was included.
718     SourceLocation ImportLoc;
719 
720     /// Whether we entered this submodule via a pragma.
721     bool IsPragma;
722 
723     /// The previous SubmoduleState.
724     SubmoduleState *OuterSubmoduleState;
725 
726     /// The number of pending module macro names when we started building this.
727     unsigned OuterPendingModuleMacroNames;
728 
BuildingSubmoduleInfoBuildingSubmoduleInfo729     BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
730                           SubmoduleState *OuterSubmoduleState,
731                           unsigned OuterPendingModuleMacroNames)
732         : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
733           OuterSubmoduleState(OuterSubmoduleState),
734           OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
735   };
736   SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
737 
738   /// Information about a submodule's preprocessor state.
739   struct SubmoduleState {
740     /// The macros for the submodule.
741     MacroMap Macros;
742 
743     /// The set of modules that are visible within the submodule.
744     VisibleModuleSet VisibleModules;
745 
746     // FIXME: CounterValue?
747     // FIXME: PragmaPushMacroInfo?
748   };
749   std::map<Module *, SubmoduleState> Submodules;
750 
751   /// The preprocessor state for preprocessing outside of any submodule.
752   SubmoduleState NullSubmoduleState;
753 
754   /// The current submodule state. Will be \p NullSubmoduleState if we're not
755   /// in a submodule.
756   SubmoduleState *CurSubmoduleState;
757 
758   /// The set of known macros exported from modules.
759   llvm::FoldingSet<ModuleMacro> ModuleMacros;
760 
761   /// The names of potential module macros that we've not yet processed.
762   llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
763 
764   /// The list of module macros, for each identifier, that are not overridden by
765   /// any other module macro.
766   llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
767       LeafModuleMacros;
768 
769   /// Macros that we want to warn because they are not used at the end
770   /// of the translation unit.
771   ///
772   /// We store just their SourceLocations instead of
773   /// something like MacroInfo*. The benefit of this is that when we are
774   /// deserializing from PCH, we don't need to deserialize identifier & macros
775   /// just so that we can report that they are unused, we just warn using
776   /// the SourceLocations of this set (that will be filled by the ASTReader).
777   /// We are using SmallPtrSet instead of a vector for faster removal.
778   using WarnUnusedMacroLocsTy = llvm::SmallPtrSet<SourceLocation, 32>;
779   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
780 
781   /// A "freelist" of MacroArg objects that can be
782   /// reused for quick allocation.
783   MacroArgs *MacroArgCache = nullptr;
784 
785   /// For each IdentifierInfo used in a \#pragma push_macro directive,
786   /// we keep a MacroInfo stack used to restore the previous macro value.
787   llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
788       PragmaPushMacroInfo;
789 
790   // Various statistics we track for performance analysis.
791   unsigned NumDirectives = 0;
792   unsigned NumDefined = 0;
793   unsigned NumUndefined = 0;
794   unsigned NumPragma = 0;
795   unsigned NumIf = 0;
796   unsigned NumElse = 0;
797   unsigned NumEndif = 0;
798   unsigned NumEnteredSourceFiles = 0;
799   unsigned MaxIncludeStackDepth = 0;
800   unsigned NumMacroExpanded = 0;
801   unsigned NumFnMacroExpanded = 0;
802   unsigned NumBuiltinMacroExpanded = 0;
803   unsigned NumFastMacroExpanded = 0;
804   unsigned NumTokenPaste = 0;
805   unsigned NumFastTokenPaste = 0;
806   unsigned NumSkipped = 0;
807 
808   /// The predefined macros that preprocessor should use from the
809   /// command line etc.
810   std::string Predefines;
811 
812   /// The file ID for the preprocessor predefines.
813   FileID PredefinesFileID;
814 
815   /// The file ID for the PCH through header.
816   FileID PCHThroughHeaderFileID;
817 
818   /// Whether tokens are being skipped until a #pragma hdrstop is seen.
819   bool SkippingUntilPragmaHdrStop = false;
820 
821   /// Whether tokens are being skipped until the through header is seen.
822   bool SkippingUntilPCHThroughHeader = false;
823 
824   /// \{
825   /// Cache of macro expanders to reduce malloc traffic.
826   enum { TokenLexerCacheSize = 8 };
827   unsigned NumCachedTokenLexers;
828   std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
829   /// \}
830 
831   /// Keeps macro expanded tokens for TokenLexers.
832   //
833   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
834   /// going to lex in the cache and when it finishes the tokens are removed
835   /// from the end of the cache.
836   SmallVector<Token, 16> MacroExpandedTokens;
837   std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
838 
839   /// A record of the macro definitions and expansions that
840   /// occurred during preprocessing.
841   ///
842   /// This is an optional side structure that can be enabled with
843   /// \c createPreprocessingRecord() prior to preprocessing.
844   PreprocessingRecord *Record = nullptr;
845 
846   /// Cached tokens state.
847   using CachedTokensTy = SmallVector<Token, 1>;
848 
849   /// Cached tokens are stored here when we do backtracking or
850   /// lookahead. They are "lexed" by the CachingLex() method.
851   CachedTokensTy CachedTokens;
852 
853   /// The position of the cached token that CachingLex() should
854   /// "lex" next.
855   ///
856   /// If it points beyond the CachedTokens vector, it means that a normal
857   /// Lex() should be invoked.
858   CachedTokensTy::size_type CachedLexPos = 0;
859 
860   /// Stack of backtrack positions, allowing nested backtracks.
861   ///
862   /// The EnableBacktrackAtThisPos() method pushes a position to
863   /// indicate where CachedLexPos should be set when the BackTrack() method is
864   /// invoked (at which point the last position is popped).
865   std::vector<CachedTokensTy::size_type> BacktrackPositions;
866 
867   struct MacroInfoChain {
868     MacroInfo MI;
869     MacroInfoChain *Next;
870   };
871 
872   /// MacroInfos are managed as a chain for easy disposal.  This is the head
873   /// of that list.
874   MacroInfoChain *MIChainHead = nullptr;
875 
876   void updateOutOfDateIdentifier(IdentifierInfo &II) const;
877 
878 public:
879   Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
880                DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
881                HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
882                IdentifierInfoLookup *IILookup = nullptr,
883                bool OwnsHeaderSearch = false,
884                TranslationUnitKind TUKind = TU_Complete);
885 
886   ~Preprocessor();
887 
888   /// Initialize the preprocessor using information about the target.
889   ///
890   /// \param Target is owned by the caller and must remain valid for the
891   /// lifetime of the preprocessor.
892   /// \param AuxTarget is owned by the caller and must remain valid for
893   /// the lifetime of the preprocessor.
894   void Initialize(const TargetInfo &Target,
895                   const TargetInfo *AuxTarget = nullptr);
896 
897   /// Initialize the preprocessor to parse a model file
898   ///
899   /// To parse model files the preprocessor of the original source is reused to
900   /// preserver the identifier table. However to avoid some duplicate
901   /// information in the preprocessor some cleanup is needed before it is used
902   /// to parse model files. This method does that cleanup.
903   void InitializeForModelFile();
904 
905   /// Cleanup after model file parsing
906   void FinalizeForModelFile();
907 
908   /// Retrieve the preprocessor options used to initialize this
909   /// preprocessor.
getPreprocessorOpts()910   PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
911 
getDiagnostics()912   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
setDiagnostics(DiagnosticsEngine & D)913   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
914 
getLangOpts()915   const LangOptions &getLangOpts() const { return LangOpts; }
getTargetInfo()916   const TargetInfo &getTargetInfo() const { return *Target; }
getAuxTargetInfo()917   const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
getFileManager()918   FileManager &getFileManager() const { return FileMgr; }
getSourceManager()919   SourceManager &getSourceManager() const { return SourceMgr; }
getHeaderSearchInfo()920   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
921 
getIdentifierTable()922   IdentifierTable &getIdentifierTable() { return Identifiers; }
getIdentifierTable()923   const IdentifierTable &getIdentifierTable() const { return Identifiers; }
getSelectorTable()924   SelectorTable &getSelectorTable() { return Selectors; }
getBuiltinInfo()925   Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
getPreprocessorAllocator()926   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
927 
setExternalSource(ExternalPreprocessorSource * Source)928   void setExternalSource(ExternalPreprocessorSource *Source) {
929     ExternalSource = Source;
930   }
931 
getExternalSource()932   ExternalPreprocessorSource *getExternalSource() const {
933     return ExternalSource;
934   }
935 
936   /// Retrieve the module loader associated with this preprocessor.
getModuleLoader()937   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
938 
hadModuleLoaderFatalFailure()939   bool hadModuleLoaderFatalFailure() const {
940     return TheModuleLoader.HadFatalFailure;
941   }
942 
943   /// Retrieve the number of Directives that have been processed by the
944   /// Preprocessor.
getNumDirectives()945   unsigned getNumDirectives() const {
946     return NumDirectives;
947   }
948 
949   /// True if we are currently preprocessing a #if or #elif directive
isParsingIfOrElifDirective()950   bool isParsingIfOrElifDirective() const {
951     return ParsingIfOrElifDirective;
952   }
953 
954   /// Control whether the preprocessor retains comments in output.
SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)955   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
956     this->KeepComments = KeepComments | KeepMacroComments;
957     this->KeepMacroComments = KeepMacroComments;
958   }
959 
getCommentRetentionState()960   bool getCommentRetentionState() const { return KeepComments; }
961 
setPragmasEnabled(bool Enabled)962   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
getPragmasEnabled()963   bool getPragmasEnabled() const { return PragmasEnabled; }
964 
SetSuppressIncludeNotFoundError(bool Suppress)965   void SetSuppressIncludeNotFoundError(bool Suppress) {
966     SuppressIncludeNotFoundError = Suppress;
967   }
968 
GetSuppressIncludeNotFoundError()969   bool GetSuppressIncludeNotFoundError() {
970     return SuppressIncludeNotFoundError;
971   }
972 
973   /// Sets whether the preprocessor is responsible for producing output or if
974   /// it is producing tokens to be consumed by Parse and Sema.
setPreprocessedOutput(bool IsPreprocessedOutput)975   void setPreprocessedOutput(bool IsPreprocessedOutput) {
976     PreprocessedOutput = IsPreprocessedOutput;
977   }
978 
979   /// Returns true if the preprocessor is responsible for generating output,
980   /// false if it is producing tokens to be consumed by Parse and Sema.
isPreprocessedOutput()981   bool isPreprocessedOutput() const { return PreprocessedOutput; }
982 
983   /// Return true if we are lexing directly from the specified lexer.
isCurrentLexer(const PreprocessorLexer * L)984   bool isCurrentLexer(const PreprocessorLexer *L) const {
985     return CurPPLexer == L;
986   }
987 
988   /// Return the current lexer being lexed from.
989   ///
990   /// Note that this ignores any potentially active macro expansions and _Pragma
991   /// expansions going on at the time.
getCurrentLexer()992   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
993 
994   /// Return the current file lexer being lexed from.
995   ///
996   /// Note that this ignores any potentially active macro expansions and _Pragma
997   /// expansions going on at the time.
998   PreprocessorLexer *getCurrentFileLexer() const;
999 
1000   /// Return the submodule owning the file being lexed. This may not be
1001   /// the current module if we have changed modules since entering the file.
getCurrentLexerSubmodule()1002   Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1003 
1004   /// Returns the FileID for the preprocessor predefines.
getPredefinesFileID()1005   FileID getPredefinesFileID() const { return PredefinesFileID; }
1006 
1007   /// \{
1008   /// Accessors for preprocessor callbacks.
1009   ///
1010   /// Note that this class takes ownership of any PPCallbacks object given to
1011   /// it.
getPPCallbacks()1012   PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
addPPCallbacks(std::unique_ptr<PPCallbacks> C)1013   void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1014     if (Callbacks)
1015       C = std::make_unique<PPChainedCallbacks>(std::move(C),
1016                                                 std::move(Callbacks));
1017     Callbacks = std::move(C);
1018   }
1019   /// \}
1020 
1021   /// Get the number of tokens processed so far.
getTokenCount()1022   unsigned getTokenCount() const { return TokenCount; }
1023 
1024   /// Get the max number of tokens before issuing a -Wmax-tokens warning.
getMaxTokens()1025   unsigned getMaxTokens() const { return MaxTokens; }
1026 
overrideMaxTokens(unsigned Value,SourceLocation Loc)1027   void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1028     MaxTokens = Value;
1029     MaxTokensOverrideLoc = Loc;
1030   };
1031 
getMaxTokensOverrideLoc()1032   SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1033 
1034   /// Register a function that would be called on each token in the final
1035   /// expanded token stream.
1036   /// This also reports annotation tokens produced by the parser.
setTokenWatcher(llvm::unique_function<void (const clang::Token &)> F)1037   void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1038     OnToken = std::move(F);
1039   }
1040 
isMacroDefined(StringRef Id)1041   bool isMacroDefined(StringRef Id) {
1042     return isMacroDefined(&Identifiers.get(Id));
1043   }
isMacroDefined(const IdentifierInfo * II)1044   bool isMacroDefined(const IdentifierInfo *II) {
1045     return II->hasMacroDefinition() &&
1046            (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1047   }
1048 
1049   /// Determine whether II is defined as a macro within the module M,
1050   /// if that is a module that we've already preprocessed. Does not check for
1051   /// macros imported into M.
isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)1052   bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1053     if (!II->hasMacroDefinition())
1054       return false;
1055     auto I = Submodules.find(M);
1056     if (I == Submodules.end())
1057       return false;
1058     auto J = I->second.Macros.find(II);
1059     if (J == I->second.Macros.end())
1060       return false;
1061     auto *MD = J->second.getLatest();
1062     return MD && MD->isDefined();
1063   }
1064 
getMacroDefinition(const IdentifierInfo * II)1065   MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1066     if (!II->hasMacroDefinition())
1067       return {};
1068 
1069     MacroState &S = CurSubmoduleState->Macros[II];
1070     auto *MD = S.getLatest();
1071     while (MD && isa<VisibilityMacroDirective>(MD))
1072       MD = MD->getPrevious();
1073     return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1074                            S.getActiveModuleMacros(*this, II),
1075                            S.isAmbiguous(*this, II));
1076   }
1077 
getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)1078   MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1079                                           SourceLocation Loc) {
1080     if (!II->hadMacroDefinition())
1081       return {};
1082 
1083     MacroState &S = CurSubmoduleState->Macros[II];
1084     MacroDirective::DefInfo DI;
1085     if (auto *MD = S.getLatest())
1086       DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1087     // FIXME: Compute the set of active module macros at the specified location.
1088     return MacroDefinition(DI.getDirective(),
1089                            S.getActiveModuleMacros(*this, II),
1090                            S.isAmbiguous(*this, II));
1091   }
1092 
1093   /// Given an identifier, return its latest non-imported MacroDirective
1094   /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
getLocalMacroDirective(const IdentifierInfo * II)1095   MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1096     if (!II->hasMacroDefinition())
1097       return nullptr;
1098 
1099     auto *MD = getLocalMacroDirectiveHistory(II);
1100     if (!MD || MD->getDefinition().isUndefined())
1101       return nullptr;
1102 
1103     return MD;
1104   }
1105 
getMacroInfo(const IdentifierInfo * II)1106   const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1107     return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1108   }
1109 
getMacroInfo(const IdentifierInfo * II)1110   MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1111     if (!II->hasMacroDefinition())
1112       return nullptr;
1113     if (auto MD = getMacroDefinition(II))
1114       return MD.getMacroInfo();
1115     return nullptr;
1116   }
1117 
1118   /// Given an identifier, return the latest non-imported macro
1119   /// directive for that identifier.
1120   ///
1121   /// One can iterate over all previous macro directives from the most recent
1122   /// one.
1123   MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1124 
1125   /// Add a directive to the macro directive history for this identifier.
1126   void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)1127   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1128                                              SourceLocation Loc) {
1129     DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1130     appendMacroDirective(II, MD);
1131     return MD;
1132   }
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)1133   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1134                                              MacroInfo *MI) {
1135     return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1136   }
1137 
1138   /// Set a MacroDirective that was loaded from a PCH file.
1139   void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1140                                MacroDirective *MD);
1141 
1142   /// Register an exported macro for a module and identifier.
1143   ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
1144                               ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1145   ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II);
1146 
1147   /// Get the list of leaf (non-overridden) module macros for a name.
getLeafModuleMacros(const IdentifierInfo * II)1148   ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1149     if (II->isOutOfDate())
1150       updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
1151     auto I = LeafModuleMacros.find(II);
1152     if (I != LeafModuleMacros.end())
1153       return I->second;
1154     return None;
1155   }
1156 
1157   /// \{
1158   /// Iterators for the macro history table. Currently defined macros have
1159   /// IdentifierInfo::hasMacroDefinition() set and an empty
1160   /// MacroInfo::getUndefLoc() at the head of the list.
1161   using macro_iterator = MacroMap::const_iterator;
1162 
1163   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1164   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1165 
1166   llvm::iterator_range<macro_iterator>
1167   macros(bool IncludeExternalMacros = true) const {
1168     macro_iterator begin = macro_begin(IncludeExternalMacros);
1169     macro_iterator end = macro_end(IncludeExternalMacros);
1170     return llvm::make_range(begin, end);
1171   }
1172 
1173   /// \}
1174 
1175   /// Return the name of the macro defined before \p Loc that has
1176   /// spelling \p Tokens.  If there are multiple macros with same spelling,
1177   /// return the last one defined.
1178   StringRef getLastMacroWithSpelling(SourceLocation Loc,
1179                                      ArrayRef<TokenValue> Tokens) const;
1180 
getPredefines()1181   const std::string &getPredefines() const { return Predefines; }
1182 
1183   /// Set the predefines for this Preprocessor.
1184   ///
1185   /// These predefines are automatically injected when parsing the main file.
setPredefines(const char * P)1186   void setPredefines(const char *P) { Predefines = P; }
setPredefines(StringRef P)1187   void setPredefines(StringRef P) { Predefines = std::string(P); }
1188 
1189   /// Return information about the specified preprocessor
1190   /// identifier token.
getIdentifierInfo(StringRef Name)1191   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1192     return &Identifiers.get(Name);
1193   }
1194 
1195   /// Add the specified pragma handler to this preprocessor.
1196   ///
1197   /// If \p Namespace is non-null, then it is a token required to exist on the
1198   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1199   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
AddPragmaHandler(PragmaHandler * Handler)1200   void AddPragmaHandler(PragmaHandler *Handler) {
1201     AddPragmaHandler(StringRef(), Handler);
1202   }
1203 
1204   /// Remove the specific pragma handler from this preprocessor.
1205   ///
1206   /// If \p Namespace is non-null, then it should be the namespace that
1207   /// \p Handler was added to. It is an error to remove a handler that
1208   /// has not been registered.
1209   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
RemovePragmaHandler(PragmaHandler * Handler)1210   void RemovePragmaHandler(PragmaHandler *Handler) {
1211     RemovePragmaHandler(StringRef(), Handler);
1212   }
1213 
1214   /// Install empty handlers for all pragmas (making them ignored).
1215   void IgnorePragmas();
1216 
1217   /// Add the specified comment handler to the preprocessor.
1218   void addCommentHandler(CommentHandler *Handler);
1219 
1220   /// Remove the specified comment handler.
1221   ///
1222   /// It is an error to remove a handler that has not been registered.
1223   void removeCommentHandler(CommentHandler *Handler);
1224 
1225   /// Set the code completion handler to the given object.
setCodeCompletionHandler(CodeCompletionHandler & Handler)1226   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1227     CodeComplete = &Handler;
1228   }
1229 
1230   /// Retrieve the current code-completion handler.
getCodeCompletionHandler()1231   CodeCompletionHandler *getCodeCompletionHandler() const {
1232     return CodeComplete;
1233   }
1234 
1235   /// Clear out the code completion handler.
clearCodeCompletionHandler()1236   void clearCodeCompletionHandler() {
1237     CodeComplete = nullptr;
1238   }
1239 
1240   /// Hook used by the lexer to invoke the "included file" code
1241   /// completion point.
1242   void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1243 
1244   /// Hook used by the lexer to invoke the "natural language" code
1245   /// completion point.
1246   void CodeCompleteNaturalLanguage();
1247 
1248   /// Set the code completion token for filtering purposes.
setCodeCompletionIdentifierInfo(IdentifierInfo * Filter)1249   void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1250     CodeCompletionII = Filter;
1251   }
1252 
1253   /// Set the code completion token range for detecting replacement range later
1254   /// on.
setCodeCompletionTokenRange(const SourceLocation Start,const SourceLocation End)1255   void setCodeCompletionTokenRange(const SourceLocation Start,
1256                                    const SourceLocation End) {
1257     CodeCompletionTokenRange = {Start, End};
1258   }
getCodeCompletionTokenRange()1259   SourceRange getCodeCompletionTokenRange() const {
1260     return CodeCompletionTokenRange;
1261   }
1262 
1263   /// Get the code completion token for filtering purposes.
getCodeCompletionFilter()1264   StringRef getCodeCompletionFilter() {
1265     if (CodeCompletionII)
1266       return CodeCompletionII->getName();
1267     return {};
1268   }
1269 
1270   /// Retrieve the preprocessing record, or NULL if there is no
1271   /// preprocessing record.
getPreprocessingRecord()1272   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1273 
1274   /// Create a new preprocessing record, which will keep track of
1275   /// all macro expansions, macro definitions, etc.
1276   void createPreprocessingRecord();
1277 
1278   /// Returns true if the FileEntry is the PCH through header.
1279   bool isPCHThroughHeader(const FileEntry *FE);
1280 
1281   /// True if creating a PCH with a through header.
1282   bool creatingPCHWithThroughHeader();
1283 
1284   /// True if using a PCH with a through header.
1285   bool usingPCHWithThroughHeader();
1286 
1287   /// True if creating a PCH with a #pragma hdrstop.
1288   bool creatingPCHWithPragmaHdrStop();
1289 
1290   /// True if using a PCH with a #pragma hdrstop.
1291   bool usingPCHWithPragmaHdrStop();
1292 
1293   /// Skip tokens until after the #include of the through header or
1294   /// until after a #pragma hdrstop.
1295   void SkipTokensWhileUsingPCH();
1296 
1297   /// Process directives while skipping until the through header or
1298   /// #pragma hdrstop is found.
1299   void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1300                                            SourceLocation HashLoc);
1301 
1302   /// Enter the specified FileID as the main source file,
1303   /// which implicitly adds the builtin defines etc.
1304   void EnterMainSourceFile();
1305 
1306   /// Inform the preprocessor callbacks that processing is complete.
1307   void EndSourceFile();
1308 
1309   /// Add a source file to the top of the include stack and
1310   /// start lexing tokens from it instead of the current buffer.
1311   ///
1312   /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1313   bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir,
1314                        SourceLocation Loc);
1315 
1316   /// Add a Macro to the top of the include stack and start lexing
1317   /// tokens from it instead of the current buffer.
1318   ///
1319   /// \param Args specifies the tokens input to a function-like macro.
1320   /// \param ILEnd specifies the location of the ')' for a function-like macro
1321   /// or the identifier for an object-like macro.
1322   void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1323                   MacroArgs *Args);
1324 
1325 private:
1326   /// Add a "macro" context to the top of the include stack,
1327   /// which will cause the lexer to start returning the specified tokens.
1328   ///
1329   /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1330   /// will not be subject to further macro expansion. Otherwise, these tokens
1331   /// will be re-macro-expanded when/if expansion is enabled.
1332   ///
1333   /// If \p OwnsTokens is false, this method assumes that the specified stream
1334   /// of tokens has a permanent owner somewhere, so they do not need to be
1335   /// copied. If it is true, it assumes the array of tokens is allocated with
1336   /// \c new[] and the Preprocessor will delete[] it.
1337   ///
1338   /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1339   /// set, see the flag documentation for details.
1340   void EnterTokenStream(const Token *Toks, unsigned NumToks,
1341                         bool DisableMacroExpansion, bool OwnsTokens,
1342                         bool IsReinject);
1343 
1344 public:
EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion,bool IsReinject)1345   void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1346                         bool DisableMacroExpansion, bool IsReinject) {
1347     EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1348                      IsReinject);
1349   }
1350 
EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion,bool IsReinject)1351   void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1352                         bool IsReinject) {
1353     EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1354                      IsReinject);
1355   }
1356 
1357   /// Pop the current lexer/macro exp off the top of the lexer stack.
1358   ///
1359   /// This should only be used in situations where the current state of the
1360   /// top-of-stack lexer is known.
1361   void RemoveTopOfLexerStack();
1362 
1363   /// From the point that this method is called, and until
1364   /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1365   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1366   /// make the Preprocessor re-lex the same tokens.
1367   ///
1368   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1369   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1370   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1371   ///
1372   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1373   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1374   /// tokens will continue indefinitely.
1375   ///
1376   void EnableBacktrackAtThisPos();
1377 
1378   /// Disable the last EnableBacktrackAtThisPos call.
1379   void CommitBacktrackedTokens();
1380 
1381   /// Make Preprocessor re-lex the tokens that were lexed since
1382   /// EnableBacktrackAtThisPos() was previously called.
1383   void Backtrack();
1384 
1385   /// True if EnableBacktrackAtThisPos() was called and
1386   /// caching of tokens is on.
isBacktrackEnabled()1387   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1388 
1389   /// Lex the next token for this preprocessor.
1390   void Lex(Token &Result);
1391 
1392   /// Lex a token, forming a header-name token if possible.
1393   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1394 
1395   bool LexAfterModuleImport(Token &Result);
1396   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1397 
1398   void makeModuleVisible(Module *M, SourceLocation Loc);
1399 
getModuleImportLoc(Module * M)1400   SourceLocation getModuleImportLoc(Module *M) const {
1401     return CurSubmoduleState->VisibleModules.getImportLoc(M);
1402   }
1403 
1404   /// Lex a string literal, which may be the concatenation of multiple
1405   /// string literals and may even come from macro expansion.
1406   /// \returns true on success, false if a error diagnostic has been generated.
LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1407   bool LexStringLiteral(Token &Result, std::string &String,
1408                         const char *DiagnosticTag, bool AllowMacroExpansion) {
1409     if (AllowMacroExpansion)
1410       Lex(Result);
1411     else
1412       LexUnexpandedToken(Result);
1413     return FinishLexStringLiteral(Result, String, DiagnosticTag,
1414                                   AllowMacroExpansion);
1415   }
1416 
1417   /// Complete the lexing of a string literal where the first token has
1418   /// already been lexed (see LexStringLiteral).
1419   bool FinishLexStringLiteral(Token &Result, std::string &String,
1420                               const char *DiagnosticTag,
1421                               bool AllowMacroExpansion);
1422 
1423   /// Lex a token.  If it's a comment, keep lexing until we get
1424   /// something not a comment.
1425   ///
1426   /// This is useful in -E -C mode where comments would foul up preprocessor
1427   /// directive handling.
LexNonComment(Token & Result)1428   void LexNonComment(Token &Result) {
1429     do
1430       Lex(Result);
1431     while (Result.getKind() == tok::comment);
1432   }
1433 
1434   /// Just like Lex, but disables macro expansion of identifier tokens.
LexUnexpandedToken(Token & Result)1435   void LexUnexpandedToken(Token &Result) {
1436     // Disable macro expansion.
1437     bool OldVal = DisableMacroExpansion;
1438     DisableMacroExpansion = true;
1439     // Lex the token.
1440     Lex(Result);
1441 
1442     // Reenable it.
1443     DisableMacroExpansion = OldVal;
1444   }
1445 
1446   /// Like LexNonComment, but this disables macro expansion of
1447   /// identifier tokens.
LexUnexpandedNonComment(Token & Result)1448   void LexUnexpandedNonComment(Token &Result) {
1449     do
1450       LexUnexpandedToken(Result);
1451     while (Result.getKind() == tok::comment);
1452   }
1453 
1454   /// Parses a simple integer literal to get its numeric value.  Floating
1455   /// point literals and user defined literals are rejected.  Used primarily to
1456   /// handle pragmas that accept integer arguments.
1457   bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1458 
1459   /// Disables macro expansion everywhere except for preprocessor directives.
SetMacroExpansionOnlyInDirectives()1460   void SetMacroExpansionOnlyInDirectives() {
1461     DisableMacroExpansion = true;
1462     MacroExpansionInDirectivesOverride = true;
1463   }
1464 
1465   /// Peeks ahead N tokens and returns that token without consuming any
1466   /// tokens.
1467   ///
1468   /// LookAhead(0) returns the next token that would be returned by Lex(),
1469   /// LookAhead(1) returns the token after it, etc.  This returns normal
1470   /// tokens after phase 5.  As such, it is equivalent to using
1471   /// 'Lex', not 'LexUnexpandedToken'.
LookAhead(unsigned N)1472   const Token &LookAhead(unsigned N) {
1473     assert(LexLevel == 0 && "cannot use lookahead while lexing");
1474     if (CachedLexPos + N < CachedTokens.size())
1475       return CachedTokens[CachedLexPos+N];
1476     else
1477       return PeekAhead(N+1);
1478   }
1479 
1480   /// When backtracking is enabled and tokens are cached,
1481   /// this allows to revert a specific number of tokens.
1482   ///
1483   /// Note that the number of tokens being reverted should be up to the last
1484   /// backtrack position, not more.
RevertCachedTokens(unsigned N)1485   void RevertCachedTokens(unsigned N) {
1486     assert(isBacktrackEnabled() &&
1487            "Should only be called when tokens are cached for backtracking");
1488     assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1489          && "Should revert tokens up to the last backtrack position, not more");
1490     assert(signed(CachedLexPos) - signed(N) >= 0 &&
1491            "Corrupted backtrack positions ?");
1492     CachedLexPos -= N;
1493   }
1494 
1495   /// Enters a token in the token stream to be lexed next.
1496   ///
1497   /// If BackTrack() is called afterwards, the token will remain at the
1498   /// insertion point.
1499   /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1500   /// flag set. See the flag documentation for details.
EnterToken(const Token & Tok,bool IsReinject)1501   void EnterToken(const Token &Tok, bool IsReinject) {
1502     if (LexLevel) {
1503       // It's not correct in general to enter caching lex mode while in the
1504       // middle of a nested lexing action.
1505       auto TokCopy = std::make_unique<Token[]>(1);
1506       TokCopy[0] = Tok;
1507       EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1508     } else {
1509       EnterCachingLexMode();
1510       assert(IsReinject && "new tokens in the middle of cached stream");
1511       CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1512     }
1513   }
1514 
1515   /// We notify the Preprocessor that if it is caching tokens (because
1516   /// backtrack is enabled) it should replace the most recent cached tokens
1517   /// with the given annotation token. This function has no effect if
1518   /// backtracking is not enabled.
1519   ///
1520   /// Note that the use of this function is just for optimization, so that the
1521   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1522   /// invoked.
AnnotateCachedTokens(const Token & Tok)1523   void AnnotateCachedTokens(const Token &Tok) {
1524     assert(Tok.isAnnotation() && "Expected annotation token");
1525     if (CachedLexPos != 0 && isBacktrackEnabled())
1526       AnnotatePreviousCachedTokens(Tok);
1527   }
1528 
1529   /// Get the location of the last cached token, suitable for setting the end
1530   /// location of an annotation token.
getLastCachedTokenLocation()1531   SourceLocation getLastCachedTokenLocation() const {
1532     assert(CachedLexPos != 0);
1533     return CachedTokens[CachedLexPos-1].getLastLoc();
1534   }
1535 
1536   /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1537   /// CachedTokens.
1538   bool IsPreviousCachedToken(const Token &Tok) const;
1539 
1540   /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1541   /// in \p NewToks.
1542   ///
1543   /// Useful when a token needs to be split in smaller ones and CachedTokens
1544   /// most recent token must to be updated to reflect that.
1545   void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1546 
1547   /// Replace the last token with an annotation token.
1548   ///
1549   /// Like AnnotateCachedTokens(), this routine replaces an
1550   /// already-parsed (and resolved) token with an annotation
1551   /// token. However, this routine only replaces the last token with
1552   /// the annotation token; it does not affect any other cached
1553   /// tokens. This function has no effect if backtracking is not
1554   /// enabled.
ReplaceLastTokenWithAnnotation(const Token & Tok)1555   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1556     assert(Tok.isAnnotation() && "Expected annotation token");
1557     if (CachedLexPos != 0 && isBacktrackEnabled())
1558       CachedTokens[CachedLexPos-1] = Tok;
1559   }
1560 
1561   /// Enter an annotation token into the token stream.
1562   void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1563                             void *AnnotationVal);
1564 
1565   /// Determine whether it's possible for a future call to Lex to produce an
1566   /// annotation token created by a previous call to EnterAnnotationToken.
mightHavePendingAnnotationTokens()1567   bool mightHavePendingAnnotationTokens() {
1568     return CurLexerKind != CLK_Lexer;
1569   }
1570 
1571   /// Update the current token to represent the provided
1572   /// identifier, in order to cache an action performed by typo correction.
TypoCorrectToken(const Token & Tok)1573   void TypoCorrectToken(const Token &Tok) {
1574     assert(Tok.getIdentifierInfo() && "Expected identifier token");
1575     if (CachedLexPos != 0 && isBacktrackEnabled())
1576       CachedTokens[CachedLexPos-1] = Tok;
1577   }
1578 
1579   /// Recompute the current lexer kind based on the CurLexer/
1580   /// CurTokenLexer pointers.
1581   void recomputeCurLexerKind();
1582 
1583   /// Returns true if incremental processing is enabled
isIncrementalProcessingEnabled()1584   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1585 
1586   /// Enables the incremental processing
1587   void enableIncrementalProcessing(bool value = true) {
1588     IncrementalProcessing = value;
1589   }
1590 
1591   /// Specify the point at which code-completion will be performed.
1592   ///
1593   /// \param File the file in which code completion should occur. If
1594   /// this file is included multiple times, code-completion will
1595   /// perform completion the first time it is included. If NULL, this
1596   /// function clears out the code-completion point.
1597   ///
1598   /// \param Line the line at which code completion should occur
1599   /// (1-based).
1600   ///
1601   /// \param Column the column at which code completion should occur
1602   /// (1-based).
1603   ///
1604   /// \returns true if an error occurred, false otherwise.
1605   bool SetCodeCompletionPoint(const FileEntry *File,
1606                               unsigned Line, unsigned Column);
1607 
1608   /// Determine if we are performing code completion.
isCodeCompletionEnabled()1609   bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1610 
1611   /// Returns the location of the code-completion point.
1612   ///
1613   /// Returns an invalid location if code-completion is not enabled or the file
1614   /// containing the code-completion point has not been lexed yet.
getCodeCompletionLoc()1615   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1616 
1617   /// Returns the start location of the file of code-completion point.
1618   ///
1619   /// Returns an invalid location if code-completion is not enabled or the file
1620   /// containing the code-completion point has not been lexed yet.
getCodeCompletionFileLoc()1621   SourceLocation getCodeCompletionFileLoc() const {
1622     return CodeCompletionFileLoc;
1623   }
1624 
1625   /// Returns true if code-completion is enabled and we have hit the
1626   /// code-completion point.
isCodeCompletionReached()1627   bool isCodeCompletionReached() const { return CodeCompletionReached; }
1628 
1629   /// Note that we hit the code-completion point.
setCodeCompletionReached()1630   void setCodeCompletionReached() {
1631     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1632     CodeCompletionReached = true;
1633     // Silence any diagnostics that occur after we hit the code-completion.
1634     getDiagnostics().setSuppressAllDiagnostics(true);
1635   }
1636 
1637   /// The location of the currently-active \#pragma clang
1638   /// arc_cf_code_audited begin.
1639   ///
1640   /// Returns an invalid location if there is no such pragma active.
1641   std::pair<IdentifierInfo *, SourceLocation>
getPragmaARCCFCodeAuditedInfo()1642   getPragmaARCCFCodeAuditedInfo() const {
1643     return PragmaARCCFCodeAuditedInfo;
1644   }
1645 
1646   /// Set the location of the currently-active \#pragma clang
1647   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
setPragmaARCCFCodeAuditedInfo(IdentifierInfo * Ident,SourceLocation Loc)1648   void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
1649                                      SourceLocation Loc) {
1650     PragmaARCCFCodeAuditedInfo = {Ident, Loc};
1651   }
1652 
1653   /// The location of the currently-active \#pragma clang
1654   /// assume_nonnull begin.
1655   ///
1656   /// Returns an invalid location if there is no such pragma active.
getPragmaAssumeNonNullLoc()1657   SourceLocation getPragmaAssumeNonNullLoc() const {
1658     return PragmaAssumeNonNullLoc;
1659   }
1660 
1661   /// Set the location of the currently-active \#pragma clang
1662   /// assume_nonnull begin.  An invalid location ends the pragma.
setPragmaAssumeNonNullLoc(SourceLocation Loc)1663   void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
1664     PragmaAssumeNonNullLoc = Loc;
1665   }
1666 
1667   /// Set the directory in which the main file should be considered
1668   /// to have been found, if it is not a real file.
setMainFileDir(const DirectoryEntry * Dir)1669   void setMainFileDir(const DirectoryEntry *Dir) {
1670     MainFileDir = Dir;
1671   }
1672 
1673   /// Instruct the preprocessor to skip part of the main source file.
1674   ///
1675   /// \param Bytes The number of bytes in the preamble to skip.
1676   ///
1677   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1678   /// start of a line.
setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)1679   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1680     SkipMainFilePreamble.first = Bytes;
1681     SkipMainFilePreamble.second = StartOfLine;
1682   }
1683 
1684   /// Forwarding function for diagnostics.  This emits a diagnostic at
1685   /// the specified Token's location, translating the token's start
1686   /// position in the current buffer into a SourcePosition object for rendering.
Diag(SourceLocation Loc,unsigned DiagID)1687   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1688     return Diags->Report(Loc, DiagID);
1689   }
1690 
Diag(const Token & Tok,unsigned DiagID)1691   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1692     return Diags->Report(Tok.getLocation(), DiagID);
1693   }
1694 
1695   /// Return the 'spelling' of the token at the given
1696   /// location; does not go up to the spelling location or down to the
1697   /// expansion location.
1698   ///
1699   /// \param buffer A buffer which will be used only if the token requires
1700   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
1701   /// \param invalid If non-null, will be set \c true if an error occurs.
1702   StringRef getSpelling(SourceLocation loc,
1703                         SmallVectorImpl<char> &buffer,
1704                         bool *invalid = nullptr) const {
1705     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1706   }
1707 
1708   /// Return the 'spelling' of the Tok token.
1709   ///
1710   /// The spelling of a token is the characters used to represent the token in
1711   /// the source file after trigraph expansion and escaped-newline folding.  In
1712   /// particular, this wants to get the true, uncanonicalized, spelling of
1713   /// things like digraphs, UCNs, etc.
1714   ///
1715   /// \param Invalid If non-null, will be set \c true if an error occurs.
1716   std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1717     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1718   }
1719 
1720   /// Get the spelling of a token into a preallocated buffer, instead
1721   /// of as an std::string.
1722   ///
1723   /// The caller is required to allocate enough space for the token, which is
1724   /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1725   /// actual result is returned.
1726   ///
1727   /// Note that this method may do two possible things: it may either fill in
1728   /// the buffer specified with characters, or it may *change the input pointer*
1729   /// to point to a constant buffer with the data already in it (avoiding a
1730   /// copy).  The caller is not allowed to modify the returned buffer pointer
1731   /// if an internal buffer is returned.
1732   unsigned getSpelling(const Token &Tok, const char *&Buffer,
1733                        bool *Invalid = nullptr) const {
1734     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1735   }
1736 
1737   /// Get the spelling of a token into a SmallVector.
1738   ///
1739   /// Note that the returned StringRef may not point to the
1740   /// supplied buffer if a copy can be avoided.
1741   StringRef getSpelling(const Token &Tok,
1742                         SmallVectorImpl<char> &Buffer,
1743                         bool *Invalid = nullptr) const;
1744 
1745   /// Relex the token at the specified location.
1746   /// \returns true if there was a failure, false on success.
1747   bool getRawToken(SourceLocation Loc, Token &Result,
1748                    bool IgnoreWhiteSpace = false) {
1749     return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1750   }
1751 
1752   /// Given a Token \p Tok that is a numeric constant with length 1,
1753   /// return the character.
1754   char
1755   getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1756                                               bool *Invalid = nullptr) const {
1757     assert(Tok.is(tok::numeric_constant) &&
1758            Tok.getLength() == 1 && "Called on unsupported token");
1759     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1760 
1761     // If the token is carrying a literal data pointer, just use it.
1762     if (const char *D = Tok.getLiteralData())
1763       return *D;
1764 
1765     // Otherwise, fall back on getCharacterData, which is slower, but always
1766     // works.
1767     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1768   }
1769 
1770   /// Retrieve the name of the immediate macro expansion.
1771   ///
1772   /// This routine starts from a source location, and finds the name of the
1773   /// macro responsible for its immediate expansion. It looks through any
1774   /// intervening macro argument expansions to compute this. It returns a
1775   /// StringRef that refers to the SourceManager-owned buffer of the source
1776   /// where that macro name is spelled. Thus, the result shouldn't out-live
1777   /// the SourceManager.
getImmediateMacroName(SourceLocation Loc)1778   StringRef getImmediateMacroName(SourceLocation Loc) {
1779     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1780   }
1781 
1782   /// Plop the specified string into a scratch buffer and set the
1783   /// specified token's location and length to it.
1784   ///
1785   /// If specified, the source location provides a location of the expansion
1786   /// point of the token.
1787   void CreateString(StringRef Str, Token &Tok,
1788                     SourceLocation ExpansionLocStart = SourceLocation(),
1789                     SourceLocation ExpansionLocEnd = SourceLocation());
1790 
1791   /// Split the first Length characters out of the token starting at TokLoc
1792   /// and return a location pointing to the split token. Re-lexing from the
1793   /// split token will return the split token rather than the original.
1794   SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
1795 
1796   /// Computes the source location just past the end of the
1797   /// token at this source location.
1798   ///
1799   /// This routine can be used to produce a source location that
1800   /// points just past the end of the token referenced by \p Loc, and
1801   /// is generally used when a diagnostic needs to point just after a
1802   /// token where it expected something different that it received. If
1803   /// the returned source location would not be meaningful (e.g., if
1804   /// it points into a macro), this routine returns an invalid
1805   /// source location.
1806   ///
1807   /// \param Offset an offset from the end of the token, where the source
1808   /// location should refer to. The default offset (0) produces a source
1809   /// location pointing just past the end of the token; an offset of 1 produces
1810   /// a source location pointing to the last character in the token, etc.
1811   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1812     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1813   }
1814 
1815   /// Returns true if the given MacroID location points at the first
1816   /// token of the macro expansion.
1817   ///
1818   /// \param MacroBegin If non-null and function returns true, it is set to
1819   /// begin location of the macro.
1820   bool isAtStartOfMacroExpansion(SourceLocation loc,
1821                                  SourceLocation *MacroBegin = nullptr) const {
1822     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1823                                             MacroBegin);
1824   }
1825 
1826   /// Returns true if the given MacroID location points at the last
1827   /// token of the macro expansion.
1828   ///
1829   /// \param MacroEnd If non-null and function returns true, it is set to
1830   /// end location of the macro.
1831   bool isAtEndOfMacroExpansion(SourceLocation loc,
1832                                SourceLocation *MacroEnd = nullptr) const {
1833     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1834   }
1835 
1836   /// Print the token to stderr, used for debugging.
1837   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1838   void DumpLocation(SourceLocation Loc) const;
1839   void DumpMacro(const MacroInfo &MI) const;
1840   void dumpMacroInfo(const IdentifierInfo *II);
1841 
1842   /// Given a location that specifies the start of a
1843   /// token, return a new location that specifies a character within the token.
AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)1844   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1845                                          unsigned Char) const {
1846     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1847   }
1848 
1849   /// Increment the counters for the number of token paste operations
1850   /// performed.
1851   ///
1852   /// If fast was specified, this is a 'fast paste' case we handled.
IncrementPasteCounter(bool isFast)1853   void IncrementPasteCounter(bool isFast) {
1854     if (isFast)
1855       ++NumFastTokenPaste;
1856     else
1857       ++NumTokenPaste;
1858   }
1859 
1860   void PrintStats();
1861 
1862   size_t getTotalMemory() const;
1863 
1864   /// When the macro expander pastes together a comment (/##/) in Microsoft
1865   /// mode, this method handles updating the current state, returning the
1866   /// token on the next source line.
1867   void HandleMicrosoftCommentPaste(Token &Tok);
1868 
1869   //===--------------------------------------------------------------------===//
1870   // Preprocessor callback methods.  These are invoked by a lexer as various
1871   // directives and events are found.
1872 
1873   /// Given a tok::raw_identifier token, look up the
1874   /// identifier information for the token and install it into the token,
1875   /// updating the token kind accordingly.
1876   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1877 
1878 private:
1879   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1880 
1881 public:
1882   /// Specifies the reason for poisoning an identifier.
1883   ///
1884   /// If that identifier is accessed while poisoned, then this reason will be
1885   /// used instead of the default "poisoned" diagnostic.
1886   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1887 
1888   /// Display reason for poisoned identifier.
1889   void HandlePoisonedIdentifier(Token & Identifier);
1890 
MaybeHandlePoisonedIdentifier(Token & Identifier)1891   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1892     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1893       if(II->isPoisoned()) {
1894         HandlePoisonedIdentifier(Identifier);
1895       }
1896     }
1897   }
1898 
1899 private:
1900   /// Identifiers used for SEH handling in Borland. These are only
1901   /// allowed in particular circumstances
1902   // __except block
1903   IdentifierInfo *Ident__exception_code,
1904                  *Ident___exception_code,
1905                  *Ident_GetExceptionCode;
1906   // __except filter expression
1907   IdentifierInfo *Ident__exception_info,
1908                  *Ident___exception_info,
1909                  *Ident_GetExceptionInfo;
1910   // __finally
1911   IdentifierInfo *Ident__abnormal_termination,
1912                  *Ident___abnormal_termination,
1913                  *Ident_AbnormalTermination;
1914 
1915   const char *getCurLexerEndPos();
1916   void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
1917 
1918 public:
1919   void PoisonSEHIdentifiers(bool Poison = true); // Borland
1920 
1921   /// Callback invoked when the lexer reads an identifier and has
1922   /// filled in the tokens IdentifierInfo member.
1923   ///
1924   /// This callback potentially macro expands it or turns it into a named
1925   /// token (like 'for').
1926   ///
1927   /// \returns true if we actually computed a token, false if we need to
1928   /// lex again.
1929   bool HandleIdentifier(Token &Identifier);
1930 
1931   /// Callback invoked when the lexer hits the end of the current file.
1932   ///
1933   /// This either returns the EOF token and returns true, or
1934   /// pops a level off the include stack and returns false, at which point the
1935   /// client should call lex again.
1936   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1937 
1938   /// Callback invoked when the current TokenLexer hits the end of its
1939   /// token stream.
1940   bool HandleEndOfTokenLexer(Token &Result);
1941 
1942   /// Callback invoked when the lexer sees a # token at the start of a
1943   /// line.
1944   ///
1945   /// This consumes the directive, modifies the lexer/preprocessor state, and
1946   /// advances the lexer(s) so that the next token read is the correct one.
1947   void HandleDirective(Token &Result);
1948 
1949   /// Ensure that the next token is a tok::eod token.
1950   ///
1951   /// If not, emit a diagnostic and consume up until the eod.
1952   /// If \p EnableMacros is true, then we consider macros that expand to zero
1953   /// tokens as being ok.
1954   ///
1955   /// \return The location of the end of the directive (the terminating
1956   /// newline).
1957   SourceLocation CheckEndOfDirective(const char *DirType,
1958                                      bool EnableMacros = false);
1959 
1960   /// Read and discard all tokens remaining on the current line until
1961   /// the tok::eod token is found. Returns the range of the skipped tokens.
1962   SourceRange DiscardUntilEndOfDirective();
1963 
1964   /// Returns true if the preprocessor has seen a use of
1965   /// __DATE__ or __TIME__ in the file so far.
SawDateOrTime()1966   bool SawDateOrTime() const {
1967     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1968   }
getCounterValue()1969   unsigned getCounterValue() const { return CounterValue; }
setCounterValue(unsigned V)1970   void setCounterValue(unsigned V) { CounterValue = V; }
1971 
1972   /// Retrieves the module that we're currently building, if any.
1973   Module *getCurrentModule();
1974 
1975   /// Allocate a new MacroInfo object with the provided SourceLocation.
1976   MacroInfo *AllocateMacroInfo(SourceLocation L);
1977 
1978   /// Turn the specified lexer token into a fully checked and spelled
1979   /// filename, e.g. as an operand of \#include.
1980   ///
1981   /// The caller is expected to provide a buffer that is large enough to hold
1982   /// the spelling of the filename, but is also expected to handle the case
1983   /// when this method decides to use a different buffer.
1984   ///
1985   /// \returns true if the input filename was in <>'s or false if it was
1986   /// in ""'s.
1987   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
1988 
1989   /// Given a "foo" or \<foo> reference, look up the indicated file.
1990   ///
1991   /// Returns None on failure.  \p isAngled indicates whether the file
1992   /// reference is for system \#include's or not (i.e. using <> instead of "").
1993   Optional<FileEntryRef>
1994   LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
1995              const DirectoryLookup *FromDir, const FileEntry *FromFile,
1996              const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath,
1997              SmallVectorImpl<char> *RelativePath,
1998              ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
1999              bool *IsFrameworkFound, bool SkipCache = false);
2000 
2001   /// Get the DirectoryLookup structure used to find the current
2002   /// FileEntry, if CurLexer is non-null and if applicable.
2003   ///
2004   /// This allows us to implement \#include_next and find directory-specific
2005   /// properties.
GetCurDirLookup()2006   const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
2007 
2008   /// Return true if we're in the top-level file, not in a \#include.
2009   bool isInPrimaryFile() const;
2010 
2011   /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2012   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
2013   bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2014 
2015   bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2016                       bool *ShadowFlag = nullptr);
2017 
2018   void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2019   Module *LeaveSubmodule(bool ForPragma);
2020 
2021 private:
2022   friend void TokenLexer::ExpandFunctionArguments();
2023 
PushIncludeMacroStack()2024   void PushIncludeMacroStack() {
2025     assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
2026     IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
2027                                    std::move(CurLexer), CurPPLexer,
2028                                    std::move(CurTokenLexer), CurDirLookup);
2029     CurPPLexer = nullptr;
2030   }
2031 
PopIncludeMacroStack()2032   void PopIncludeMacroStack() {
2033     CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2034     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2035     CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2036     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
2037     CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2038     CurLexerKind = IncludeMacroStack.back().CurLexerKind;
2039     IncludeMacroStack.pop_back();
2040   }
2041 
2042   void PropagateLineStartLeadingSpaceInfo(Token &Result);
2043 
2044   /// Determine whether we need to create module macros for #defines in the
2045   /// current context.
2046   bool needModuleMacros() const;
2047 
2048   /// Update the set of active module macros and ambiguity flag for a module
2049   /// macro name.
2050   void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2051 
2052   DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2053                                                SourceLocation Loc);
2054   UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2055   VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2056                                                              bool isPublic);
2057 
2058   /// Lex and validate a macro name, which occurs after a
2059   /// \#define or \#undef.
2060   ///
2061   /// \param MacroNameTok Token that represents the name defined or undefined.
2062   /// \param IsDefineUndef Kind if preprocessor directive.
2063   /// \param ShadowFlag Points to flag that is set if macro name shadows
2064   ///                   a keyword.
2065   ///
2066   /// This emits a diagnostic, sets the token kind to eod,
2067   /// and discards the rest of the macro line if the macro name is invalid.
2068   void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2069                      bool *ShadowFlag = nullptr);
2070 
2071   /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2072   /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2073   /// doing so performs certain validity checks including (but not limited to):
2074   ///   - # (stringization) is followed by a macro parameter
2075   /// \param MacroNameTok - Token that represents the macro name
2076   /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2077   ///
2078   ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2079   ///  returns a nullptr if an invalid sequence of tokens is encountered.
2080   MacroInfo *ReadOptionalMacroParameterListAndBody(
2081       const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2082 
2083   /// The ( starting an argument list of a macro definition has just been read.
2084   /// Lex the rest of the parameters and the closing ), updating \p MI with
2085   /// what we learn and saving in \p LastTok the last token read.
2086   /// Return true if an error occurs parsing the arg list.
2087   bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2088 
2089   /// We just read a \#if or related directive and decided that the
2090   /// subsequent tokens are in the \#if'd out portion of the
2091   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
2092   /// FoundNonSkipPortion is true, then we have already emitted code for part of
2093   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2094   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2095   /// already seen one so a \#else directive is a duplicate.  When this returns,
2096   /// the caller can lex the first valid token.
2097   void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2098                                     SourceLocation IfTokenLoc,
2099                                     bool FoundNonSkipPortion, bool FoundElse,
2100                                     SourceLocation ElseLoc = SourceLocation());
2101 
2102   /// Information about the result for evaluating an expression for a
2103   /// preprocessor directive.
2104   struct DirectiveEvalResult {
2105     /// Whether the expression was evaluated as true or not.
2106     bool Conditional;
2107 
2108     /// True if the expression contained identifiers that were undefined.
2109     bool IncludedUndefinedIds;
2110 
2111     /// The source range for the expression.
2112     SourceRange ExprRange;
2113   };
2114 
2115   /// Evaluate an integer constant expression that may occur after a
2116   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2117   ///
2118   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2119   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
2120 
2121   /// Install the standard preprocessor pragmas:
2122   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2123   void RegisterBuiltinPragmas();
2124 
2125   /// Register builtin macros such as __LINE__ with the identifier table.
2126   void RegisterBuiltinMacros();
2127 
2128   /// If an identifier token is read that is to be expanded as a macro, handle
2129   /// it and return the next token as 'Tok'.  If we lexed a token, return true;
2130   /// otherwise the caller should lex again.
2131   bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2132 
2133   /// Cache macro expanded tokens for TokenLexers.
2134   //
2135   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2136   /// going to lex in the cache and when it finishes the tokens are removed
2137   /// from the end of the cache.
2138   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2139                                   ArrayRef<Token> tokens);
2140 
2141   void removeCachedMacroExpandedTokensOfLastLexer();
2142 
2143   /// Determine whether the next preprocessor token to be
2144   /// lexed is a '('.  If so, consume the token and return true, if not, this
2145   /// method should have no observable side-effect on the lexed tokens.
2146   bool isNextPPTokenLParen();
2147 
2148   /// After reading "MACRO(", this method is invoked to read all of the formal
2149   /// arguments specified for the macro invocation.  Returns null on error.
2150   MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2151                                        SourceLocation &MacroEnd);
2152 
2153   /// If an identifier token is read that is to be expanded
2154   /// as a builtin macro, handle it and return the next token as 'Tok'.
2155   void ExpandBuiltinMacro(Token &Tok);
2156 
2157   /// Read a \c _Pragma directive, slice it up, process it, then
2158   /// return the first token after the directive.
2159   /// This assumes that the \c _Pragma token has just been read into \p Tok.
2160   void Handle_Pragma(Token &Tok);
2161 
2162   /// Like Handle_Pragma except the pragma text is not enclosed within
2163   /// a string literal.
2164   void HandleMicrosoft__pragma(Token &Tok);
2165 
2166   /// Add a lexer to the top of the include stack and
2167   /// start lexing tokens from it instead of the current buffer.
2168   void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
2169 
2170   /// Set the FileID for the preprocessor predefines.
setPredefinesFileID(FileID FID)2171   void setPredefinesFileID(FileID FID) {
2172     assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2173     PredefinesFileID = FID;
2174   }
2175 
2176   /// Set the FileID for the PCH through header.
2177   void setPCHThroughHeaderFileID(FileID FID);
2178 
2179   /// Returns true if we are lexing from a file and not a
2180   /// pragma or a macro.
IsFileLexer(const Lexer * L,const PreprocessorLexer * P)2181   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2182     return L ? !L->isPragmaLexer() : P != nullptr;
2183   }
2184 
IsFileLexer(const IncludeStackInfo & I)2185   static bool IsFileLexer(const IncludeStackInfo& I) {
2186     return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2187   }
2188 
IsFileLexer()2189   bool IsFileLexer() const {
2190     return IsFileLexer(CurLexer.get(), CurPPLexer);
2191   }
2192 
2193   //===--------------------------------------------------------------------===//
2194   // Caching stuff.
2195   void CachingLex(Token &Result);
2196 
InCachingLexMode()2197   bool InCachingLexMode() const {
2198     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2199     // that we are past EOF, not that we are in CachingLex mode.
2200     return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2201   }
2202 
2203   void EnterCachingLexMode();
2204   void EnterCachingLexModeUnchecked();
2205 
ExitCachingLexMode()2206   void ExitCachingLexMode() {
2207     if (InCachingLexMode())
2208       RemoveTopOfLexerStack();
2209   }
2210 
2211   const Token &PeekAhead(unsigned N);
2212   void AnnotatePreviousCachedTokens(const Token &Tok);
2213 
2214   //===--------------------------------------------------------------------===//
2215   /// Handle*Directive - implement the various preprocessor directives.  These
2216   /// should side-effect the current preprocessor object so that the next call
2217   /// to Lex() will return the appropriate token next.
2218   void HandleLineDirective();
2219   void HandleDigitDirective(Token &Tok);
2220   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2221   void HandleIdentSCCSDirective(Token &Tok);
2222   void HandleMacroPublicDirective(Token &Tok);
2223   void HandleMacroPrivateDirective();
2224 
2225   /// An additional notification that can be produced by a header inclusion or
2226   /// import to tell the parser what happened.
2227   struct ImportAction {
2228     enum ActionKind {
2229       None,
2230       ModuleBegin,
2231       ModuleImport,
2232       SkippedModuleImport,
2233       Failure,
2234     } Kind;
2235     Module *ModuleForHeader = nullptr;
2236 
2237     ImportAction(ActionKind AK, Module *Mod = nullptr)
KindImportAction2238         : Kind(AK), ModuleForHeader(Mod) {
2239       assert((AK == None || Mod || AK == Failure) &&
2240              "no module for module action");
2241     }
2242   };
2243 
2244   Optional<FileEntryRef> LookupHeaderIncludeOrImport(
2245       const DirectoryLookup *&CurDir, StringRef &Filename,
2246       SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2247       const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2248       bool &IsMapped, const DirectoryLookup *LookupFrom,
2249       const FileEntry *LookupFromFile, StringRef &LookupFilename,
2250       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2251       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2252 
2253   // File inclusion.
2254   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2255                               const DirectoryLookup *LookupFrom = nullptr,
2256                               const FileEntry *LookupFromFile = nullptr);
2257   ImportAction
2258   HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2259                               Token &FilenameTok, SourceLocation EndLoc,
2260                               const DirectoryLookup *LookupFrom = nullptr,
2261                               const FileEntry *LookupFromFile = nullptr);
2262   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2263   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2264   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2265   void HandleMicrosoftImportDirective(Token &Tok);
2266 
2267 public:
2268   /// Check that the given module is available, producing a diagnostic if not.
2269   /// \return \c true if the check failed (because the module is not available).
2270   ///         \c false if the module appears to be usable.
2271   static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2272                                      const TargetInfo &TargetInfo,
2273                                      DiagnosticsEngine &Diags, Module *M);
2274 
2275   // Module inclusion testing.
2276   /// Find the module that owns the source or header file that
2277   /// \p Loc points to. If the location is in a file that was included
2278   /// into a module, or is outside any module, returns nullptr.
2279   Module *getModuleForLocation(SourceLocation Loc);
2280 
2281   /// We want to produce a diagnostic at location IncLoc concerning an
2282   /// unreachable effect at location MLoc (eg, where a desired entity was
2283   /// declared or defined). Determine whether the right way to make MLoc
2284   /// reachable is by #include, and if so, what header should be included.
2285   ///
2286   /// This is not necessarily fast, and might load unexpected module maps, so
2287   /// should only be called by code that intends to produce an error.
2288   ///
2289   /// \param IncLoc The location at which the missing effect was detected.
2290   /// \param MLoc A location within an unimported module at which the desired
2291   ///        effect occurred.
2292   /// \return A file that can be #included to provide the desired effect. Null
2293   ///         if no such file could be determined or if a #include is not
2294   ///         appropriate (eg, if a module should be imported instead).
2295   const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2296                                                     SourceLocation MLoc);
2297 
isRecordingPreamble()2298   bool isRecordingPreamble() const {
2299     return PreambleConditionalStack.isRecording();
2300   }
2301 
hasRecordedPreamble()2302   bool hasRecordedPreamble() const {
2303     return PreambleConditionalStack.hasRecordedPreamble();
2304   }
2305 
getPreambleConditionalStack()2306   ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2307       return PreambleConditionalStack.getStack();
2308   }
2309 
setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s)2310   void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2311     PreambleConditionalStack.setStack(s);
2312   }
2313 
setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,llvm::Optional<PreambleSkipInfo> SkipInfo)2314   void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,
2315                                              llvm::Optional<PreambleSkipInfo> SkipInfo) {
2316     PreambleConditionalStack.startReplaying();
2317     PreambleConditionalStack.setStack(s);
2318     PreambleConditionalStack.SkipInfo = SkipInfo;
2319   }
2320 
getPreambleSkipInfo()2321   llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2322     return PreambleConditionalStack.SkipInfo;
2323   }
2324 
2325 private:
2326   /// After processing predefined file, initialize the conditional stack from
2327   /// the preamble.
2328   void replayPreambleConditionalStack();
2329 
2330   // Macro handling.
2331   void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2332   void HandleUndefDirective();
2333 
2334   // Conditional Inclusion.
2335   void HandleIfdefDirective(Token &Result, const Token &HashToken,
2336                             bool isIfndef, bool ReadAnyTokensBeforeDirective);
2337   void HandleIfDirective(Token &IfToken, const Token &HashToken,
2338                          bool ReadAnyTokensBeforeDirective);
2339   void HandleEndifDirective(Token &EndifToken);
2340   void HandleElseDirective(Token &Result, const Token &HashToken);
2341   void HandleElifDirective(Token &ElifToken, const Token &HashToken);
2342 
2343   // Pragmas.
2344   void HandlePragmaDirective(PragmaIntroducer Introducer);
2345 
2346 public:
2347   void HandlePragmaOnce(Token &OnceTok);
2348   void HandlePragmaMark();
2349   void HandlePragmaPoison();
2350   void HandlePragmaSystemHeader(Token &SysHeaderTok);
2351   void HandlePragmaDependency(Token &DependencyTok);
2352   void HandlePragmaPushMacro(Token &Tok);
2353   void HandlePragmaPopMacro(Token &Tok);
2354   void HandlePragmaIncludeAlias(Token &Tok);
2355   void HandlePragmaModuleBuild(Token &Tok);
2356   void HandlePragmaHdrstop(Token &Tok);
2357   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2358 
2359   // Return true and store the first token only if any CommentHandler
2360   // has inserted some tokens and getCommentRetentionState() is false.
2361   bool HandleComment(Token &result, SourceRange Comment);
2362 
2363   /// A macro is used, update information about macros that need unused
2364   /// warnings.
2365   void markMacroAsUsed(MacroInfo *MI);
2366 
2367 private:
2368   Optional<unsigned>
2369   getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc);
2370 
2371   /// Contains the currently active skipped range mappings for skipping excluded
2372   /// conditional directives.
2373   ExcludedPreprocessorDirectiveSkipMapping
2374       *ExcludedConditionalDirectiveSkipMappings;
2375 };
2376 
2377 /// Abstract base class that describes a handler that will receive
2378 /// source ranges for each of the comments encountered in the source file.
2379 class CommentHandler {
2380 public:
2381   virtual ~CommentHandler();
2382 
2383   // The handler shall return true if it has pushed any tokens
2384   // to be read using e.g. EnterToken or EnterTokenStream.
2385   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
2386 };
2387 
2388 /// Registry of pragma handlers added by plugins
2389 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
2390 
2391 } // namespace clang
2392 
2393 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
2394