1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
16 
17 #include "clang/Basic/Diagnostic.h"
18 #include "clang/Basic/IdentifierTable.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/LangOptions.h"
21 #include "clang/Basic/Module.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Basic/TokenKinds.h"
25 #include "clang/Lex/Lexer.h"
26 #include "clang/Lex/MacroInfo.h"
27 #include "clang/Lex/ModuleLoader.h"
28 #include "clang/Lex/ModuleMap.h"
29 #include "clang/Lex/PPCallbacks.h"
30 #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
31 #include "clang/Lex/Token.h"
32 #include "clang/Lex/TokenLexer.h"
33 #include "llvm/ADT/ArrayRef.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/FoldingSet.h"
36 #include "llvm/ADT/FunctionExtras.h"
37 #include "llvm/ADT/None.h"
38 #include "llvm/ADT/Optional.h"
39 #include "llvm/ADT/PointerUnion.h"
40 #include "llvm/ADT/STLExtras.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/ADT/TinyPtrVector.h"
45 #include "llvm/ADT/iterator_range.h"
46 #include "llvm/Support/Allocator.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Registry.h"
49 #include <cassert>
50 #include <cstddef>
51 #include <cstdint>
52 #include <map>
53 #include <memory>
54 #include <string>
55 #include <utility>
56 #include <vector>
57 
58 namespace llvm {
59 
60 template<unsigned InternalLen> class SmallString;
61 
62 } // namespace llvm
63 
64 namespace clang {
65 
66 class CodeCompletionHandler;
67 class CommentHandler;
68 class DirectoryEntry;
69 class DirectoryLookup;
70 class EmptylineHandler;
71 class ExternalPreprocessorSource;
72 class FileEntry;
73 class FileManager;
74 class HeaderSearch;
75 class MacroArgs;
76 class PragmaHandler;
77 class PragmaNamespace;
78 class PreprocessingRecord;
79 class PreprocessorLexer;
80 class PreprocessorOptions;
81 class ScratchBuffer;
82 class TargetInfo;
83 
84 namespace Builtin {
85 class Context;
86 }
87 
88 /// Stores token information for comparing actual tokens with
89 /// predefined values.  Only handles simple tokens and identifiers.
90 class TokenValue {
91   tok::TokenKind Kind;
92   IdentifierInfo *II;
93 
94 public:
TokenValue(tok::TokenKind Kind)95   TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
96     assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
97     assert(Kind != tok::identifier &&
98            "Identifiers should be created by TokenValue(IdentifierInfo *)");
99     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
100     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
101   }
102 
TokenValue(IdentifierInfo * II)103   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
104 
105   bool operator==(const Token &Tok) const {
106     return Tok.getKind() == Kind &&
107         (!II || II == Tok.getIdentifierInfo());
108   }
109 };
110 
111 /// Context in which macro name is used.
112 enum MacroUse {
113   // other than #define or #undef
114   MU_Other  = 0,
115 
116   // macro name specified in #define
117   MU_Define = 1,
118 
119   // macro name specified in #undef
120   MU_Undef  = 2
121 };
122 
123 /// Engages in a tight little dance with the lexer to efficiently
124 /// preprocess tokens.
125 ///
126 /// Lexers know only about tokens within a single source file, and don't
127 /// know anything about preprocessor-level issues like the \#include stack,
128 /// token expansion, etc.
129 class Preprocessor {
130   friend class VAOptDefinitionContext;
131   friend class VariadicMacroScopeGuard;
132 
133   llvm::unique_function<void(const clang::Token &)> OnToken;
134   std::shared_ptr<PreprocessorOptions> PPOpts;
135   DiagnosticsEngine        *Diags;
136   LangOptions       &LangOpts;
137   const TargetInfo *Target = nullptr;
138   const TargetInfo *AuxTarget = nullptr;
139   FileManager       &FileMgr;
140   SourceManager     &SourceMgr;
141   std::unique_ptr<ScratchBuffer> ScratchBuf;
142   HeaderSearch      &HeaderInfo;
143   ModuleLoader      &TheModuleLoader;
144 
145   /// External source of macros.
146   ExternalPreprocessorSource *ExternalSource;
147 
148   /// A BumpPtrAllocator object used to quickly allocate and release
149   /// objects internal to the Preprocessor.
150   llvm::BumpPtrAllocator BP;
151 
152   /// Identifiers for builtin macros and other builtins.
153   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
154   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
155   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
156   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
157   IdentifierInfo *Ident__FILE_NAME__;              // __FILE_NAME__
158   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
159   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
160   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
161   IdentifierInfo *Ident__identifier;               // __identifier
162   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
163   IdentifierInfo *Ident__VA_OPT__;                 // __VA_OPT__
164   IdentifierInfo *Ident__has_feature;              // __has_feature
165   IdentifierInfo *Ident__has_extension;            // __has_extension
166   IdentifierInfo *Ident__has_builtin;              // __has_builtin
167   IdentifierInfo *Ident__has_attribute;            // __has_attribute
168   IdentifierInfo *Ident__has_include;              // __has_include
169   IdentifierInfo *Ident__has_include_next;         // __has_include_next
170   IdentifierInfo *Ident__has_warning;              // __has_warning
171   IdentifierInfo *Ident__is_identifier;            // __is_identifier
172   IdentifierInfo *Ident__building_module;          // __building_module
173   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
174   IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
175   IdentifierInfo *Ident__has_c_attribute;          // __has_c_attribute
176   IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
177   IdentifierInfo *Ident__is_target_arch;           // __is_target_arch
178   IdentifierInfo *Ident__is_target_vendor;         // __is_target_vendor
179   IdentifierInfo *Ident__is_target_os;             // __is_target_os
180   IdentifierInfo *Ident__is_target_environment;    // __is_target_environment
181 
182   // Weak, only valid (and set) while InMacroArgs is true.
183   Token* ArgMacro;
184 
185   SourceLocation DATELoc, TIMELoc;
186 
187   // Next __COUNTER__ value, starts at 0.
188   unsigned CounterValue = 0;
189 
190   enum {
191     /// Maximum depth of \#includes.
192     MaxAllowedIncludeStackDepth = 200
193   };
194 
195   // State that is set before the preprocessor begins.
196   bool KeepComments : 1;
197   bool KeepMacroComments : 1;
198   bool SuppressIncludeNotFoundError : 1;
199 
200   // State that changes while the preprocessor runs:
201   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
202 
203   /// Whether the preprocessor owns the header search object.
204   bool OwnsHeaderSearch : 1;
205 
206   /// True if macro expansion is disabled.
207   bool DisableMacroExpansion : 1;
208 
209   /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
210   /// when parsing preprocessor directives.
211   bool MacroExpansionInDirectivesOverride : 1;
212 
213   class ResetMacroExpansionHelper;
214 
215   /// Whether we have already loaded macros from the external source.
216   mutable bool ReadMacrosFromExternalSource : 1;
217 
218   /// True if pragmas are enabled.
219   bool PragmasEnabled : 1;
220 
221   /// True if the current build action is a preprocessing action.
222   bool PreprocessedOutput : 1;
223 
224   /// True if we are currently preprocessing a #if or #elif directive
225   bool ParsingIfOrElifDirective;
226 
227   /// True if we are pre-expanding macro arguments.
228   bool InMacroArgPreExpansion;
229 
230   /// Mapping/lookup information for all identifiers in
231   /// the program, including program keywords.
232   mutable IdentifierTable Identifiers;
233 
234   /// This table contains all the selectors in the program.
235   ///
236   /// Unlike IdentifierTable above, this table *isn't* populated by the
237   /// preprocessor. It is declared/expanded here because its role/lifetime is
238   /// conceptually similar to the IdentifierTable. In addition, the current
239   /// control flow (in clang::ParseAST()), make it convenient to put here.
240   ///
241   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
242   /// the lifetime of the preprocessor.
243   SelectorTable Selectors;
244 
245   /// Information about builtins.
246   std::unique_ptr<Builtin::Context> BuiltinInfo;
247 
248   /// Tracks all of the pragmas that the client registered
249   /// with this preprocessor.
250   std::unique_ptr<PragmaNamespace> PragmaHandlers;
251 
252   /// Pragma handlers of the original source is stored here during the
253   /// parsing of a model file.
254   std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
255 
256   /// Tracks all of the comment handlers that the client registered
257   /// with this preprocessor.
258   std::vector<CommentHandler *> CommentHandlers;
259 
260   /// Empty line handler.
261   EmptylineHandler *Emptyline = nullptr;
262 
263   /// True if we want to ignore EOF token and continue later on (thus
264   /// avoid tearing the Lexer and etc. down).
265   bool IncrementalProcessing = false;
266 
267   /// The kind of translation unit we are processing.
268   TranslationUnitKind TUKind;
269 
270   /// The code-completion handler.
271   CodeCompletionHandler *CodeComplete = nullptr;
272 
273   /// The file that we're performing code-completion for, if any.
274   const FileEntry *CodeCompletionFile = nullptr;
275 
276   /// The offset in file for the code-completion point.
277   unsigned CodeCompletionOffset = 0;
278 
279   /// The location for the code-completion point. This gets instantiated
280   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
281   SourceLocation CodeCompletionLoc;
282 
283   /// The start location for the file of the code-completion point.
284   ///
285   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
286   /// for preprocessing.
287   SourceLocation CodeCompletionFileLoc;
288 
289   /// The source location of the \c import contextual keyword we just
290   /// lexed, if any.
291   SourceLocation ModuleImportLoc;
292 
293   /// The module import path that we're currently processing.
294   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
295 
296   /// Whether the last token we lexed was an '@'.
297   bool LastTokenWasAt = false;
298 
299   /// A position within a C++20 import-seq.
300   class ImportSeq {
301   public:
302     enum State : int {
303       // Positive values represent a number of unclosed brackets.
304       AtTopLevel = 0,
305       AfterTopLevelTokenSeq = -1,
306       AfterExport = -2,
307       AfterImportSeq = -3,
308     };
309 
ImportSeq(State S)310     ImportSeq(State S) : S(S) {}
311 
312     /// Saw any kind of open bracket.
handleOpenBracket()313     void handleOpenBracket() {
314       S = static_cast<State>(std::max<int>(S, 0) + 1);
315     }
316     /// Saw any kind of close bracket other than '}'.
handleCloseBracket()317     void handleCloseBracket() {
318       S = static_cast<State>(std::max<int>(S, 1) - 1);
319     }
320     /// Saw a close brace.
handleCloseBrace()321     void handleCloseBrace() {
322       handleCloseBracket();
323       if (S == AtTopLevel && !AfterHeaderName)
324         S = AfterTopLevelTokenSeq;
325     }
326     /// Saw a semicolon.
handleSemi()327     void handleSemi() {
328       if (atTopLevel()) {
329         S = AfterTopLevelTokenSeq;
330         AfterHeaderName = false;
331       }
332     }
333 
334     /// Saw an 'export' identifier.
handleExport()335     void handleExport() {
336       if (S == AfterTopLevelTokenSeq)
337         S = AfterExport;
338       else if (S <= 0)
339         S = AtTopLevel;
340     }
341     /// Saw an 'import' identifier.
handleImport()342     void handleImport() {
343       if (S == AfterTopLevelTokenSeq || S == AfterExport)
344         S = AfterImportSeq;
345       else if (S <= 0)
346         S = AtTopLevel;
347     }
348 
349     /// Saw a 'header-name' token; do not recognize any more 'import' tokens
350     /// until we reach a top-level semicolon.
handleHeaderName()351     void handleHeaderName() {
352       if (S == AfterImportSeq)
353         AfterHeaderName = true;
354       handleMisc();
355     }
356 
357     /// Saw any other token.
handleMisc()358     void handleMisc() {
359       if (S <= 0)
360         S = AtTopLevel;
361     }
362 
atTopLevel()363     bool atTopLevel() { return S <= 0; }
afterImportSeq()364     bool afterImportSeq() { return S == AfterImportSeq; }
365 
366   private:
367     State S;
368     /// Whether we're in the pp-import-suffix following the header-name in a
369     /// pp-import. If so, a close-brace is not sufficient to end the
370     /// top-level-token-seq of an import-seq.
371     bool AfterHeaderName = false;
372   };
373 
374   /// Our current position within a C++20 import-seq.
375   ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
376 
377   /// Whether the module import expects an identifier next. Otherwise,
378   /// it expects a '.' or ';'.
379   bool ModuleImportExpectsIdentifier = false;
380 
381   /// The identifier and source location of the currently-active
382   /// \#pragma clang arc_cf_code_audited begin.
383   std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
384 
385   /// The source location of the currently-active
386   /// \#pragma clang assume_nonnull begin.
387   SourceLocation PragmaAssumeNonNullLoc;
388 
389   /// True if we hit the code-completion point.
390   bool CodeCompletionReached = false;
391 
392   /// The code completion token containing the information
393   /// on the stem that is to be code completed.
394   IdentifierInfo *CodeCompletionII = nullptr;
395 
396   /// Range for the code completion token.
397   SourceRange CodeCompletionTokenRange;
398 
399   /// The directory that the main file should be considered to occupy,
400   /// if it does not correspond to a real file (as happens when building a
401   /// module).
402   const DirectoryEntry *MainFileDir = nullptr;
403 
404   /// The number of bytes that we will initially skip when entering the
405   /// main file, along with a flag that indicates whether skipping this number
406   /// of bytes will place the lexer at the start of a line.
407   ///
408   /// This is used when loading a precompiled preamble.
409   std::pair<int, bool> SkipMainFilePreamble;
410 
411   /// Whether we hit an error due to reaching max allowed include depth. Allows
412   /// to avoid hitting the same error over and over again.
413   bool HasReachedMaxIncludeDepth = false;
414 
415   /// The number of currently-active calls to Lex.
416   ///
417   /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
418   /// require asking for multiple additional tokens. This counter makes it
419   /// possible for Lex to detect whether it's producing a token for the end
420   /// of phase 4 of translation or for some other situation.
421   unsigned LexLevel = 0;
422 
423   /// The number of (LexLevel 0) preprocessor tokens.
424   unsigned TokenCount = 0;
425 
426   /// Preprocess every token regardless of LexLevel.
427   bool PreprocessToken = false;
428 
429   /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
430   /// warning, or zero for unlimited.
431   unsigned MaxTokens = 0;
432   SourceLocation MaxTokensOverrideLoc;
433 
434 public:
435   struct PreambleSkipInfo {
436     SourceLocation HashTokenLoc;
437     SourceLocation IfTokenLoc;
438     bool FoundNonSkipPortion;
439     bool FoundElse;
440     SourceLocation ElseLoc;
441 
PreambleSkipInfoPreambleSkipInfo442     PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
443                      bool FoundNonSkipPortion, bool FoundElse,
444                      SourceLocation ElseLoc)
445         : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
446           FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
447           ElseLoc(ElseLoc) {}
448   };
449 
450 private:
451   friend class ASTReader;
452   friend class MacroArgs;
453 
454   class PreambleConditionalStackStore {
455     enum State {
456       Off = 0,
457       Recording = 1,
458       Replaying = 2,
459     };
460 
461   public:
462     PreambleConditionalStackStore() = default;
463 
startRecording()464     void startRecording() { ConditionalStackState = Recording; }
startReplaying()465     void startReplaying() { ConditionalStackState = Replaying; }
isRecording()466     bool isRecording() const { return ConditionalStackState == Recording; }
isReplaying()467     bool isReplaying() const { return ConditionalStackState == Replaying; }
468 
getStack()469     ArrayRef<PPConditionalInfo> getStack() const {
470       return ConditionalStack;
471     }
472 
doneReplaying()473     void doneReplaying() {
474       ConditionalStack.clear();
475       ConditionalStackState = Off;
476     }
477 
setStack(ArrayRef<PPConditionalInfo> s)478     void setStack(ArrayRef<PPConditionalInfo> s) {
479       if (!isRecording() && !isReplaying())
480         return;
481       ConditionalStack.clear();
482       ConditionalStack.append(s.begin(), s.end());
483     }
484 
hasRecordedPreamble()485     bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
486 
reachedEOFWhileSkipping()487     bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); }
488 
clearSkipInfo()489     void clearSkipInfo() { SkipInfo.reset(); }
490 
491     llvm::Optional<PreambleSkipInfo> SkipInfo;
492 
493   private:
494     SmallVector<PPConditionalInfo, 4> ConditionalStack;
495     State ConditionalStackState = Off;
496   } PreambleConditionalStack;
497 
498   /// The current top of the stack that we're lexing from if
499   /// not expanding a macro and we are lexing directly from source code.
500   ///
501   /// Only one of CurLexer, or CurTokenLexer will be non-null.
502   std::unique_ptr<Lexer> CurLexer;
503 
504   /// The current top of the stack what we're lexing from
505   /// if not expanding a macro.
506   ///
507   /// This is an alias for CurLexer.
508   PreprocessorLexer *CurPPLexer = nullptr;
509 
510   /// Used to find the current FileEntry, if CurLexer is non-null
511   /// and if applicable.
512   ///
513   /// This allows us to implement \#include_next and find directory-specific
514   /// properties.
515   const DirectoryLookup *CurDirLookup = nullptr;
516 
517   /// The current macro we are expanding, if we are expanding a macro.
518   ///
519   /// One of CurLexer and CurTokenLexer must be null.
520   std::unique_ptr<TokenLexer> CurTokenLexer;
521 
522   /// The kind of lexer we're currently working with.
523   enum CurLexerKind {
524     CLK_Lexer,
525     CLK_TokenLexer,
526     CLK_CachingLexer,
527     CLK_LexAfterModuleImport
528   } CurLexerKind = CLK_Lexer;
529 
530   /// If the current lexer is for a submodule that is being built, this
531   /// is that submodule.
532   Module *CurLexerSubmodule = nullptr;
533 
534   /// Keeps track of the stack of files currently
535   /// \#included, and macros currently being expanded from, not counting
536   /// CurLexer/CurTokenLexer.
537   struct IncludeStackInfo {
538     enum CurLexerKind           CurLexerKind;
539     Module                     *TheSubmodule;
540     std::unique_ptr<Lexer>      TheLexer;
541     PreprocessorLexer          *ThePPLexer;
542     std::unique_ptr<TokenLexer> TheTokenLexer;
543     const DirectoryLookup      *TheDirLookup;
544 
545     // The following constructors are completely useless copies of the default
546     // versions, only needed to pacify MSVC.
IncludeStackInfoIncludeStackInfo547     IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
548                      std::unique_ptr<Lexer> &&TheLexer,
549                      PreprocessorLexer *ThePPLexer,
550                      std::unique_ptr<TokenLexer> &&TheTokenLexer,
551                      const DirectoryLookup *TheDirLookup)
552         : CurLexerKind(std::move(CurLexerKind)),
553           TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
554           ThePPLexer(std::move(ThePPLexer)),
555           TheTokenLexer(std::move(TheTokenLexer)),
556           TheDirLookup(std::move(TheDirLookup)) {}
557   };
558   std::vector<IncludeStackInfo> IncludeMacroStack;
559 
560   /// Actions invoked when some preprocessor activity is
561   /// encountered (e.g. a file is \#included, etc).
562   std::unique_ptr<PPCallbacks> Callbacks;
563 
564   struct MacroExpandsInfo {
565     Token Tok;
566     MacroDefinition MD;
567     SourceRange Range;
568 
MacroExpandsInfoMacroExpandsInfo569     MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
570         : Tok(Tok), MD(MD), Range(Range) {}
571   };
572   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
573 
574   /// Information about a name that has been used to define a module macro.
575   struct ModuleMacroInfo {
576     /// The most recent macro directive for this identifier.
577     MacroDirective *MD;
578 
579     /// The active module macros for this identifier.
580     llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
581 
582     /// The generation number at which we last updated ActiveModuleMacros.
583     /// \see Preprocessor::VisibleModules.
584     unsigned ActiveModuleMacrosGeneration = 0;
585 
586     /// Whether this macro name is ambiguous.
587     bool IsAmbiguous = false;
588 
589     /// The module macros that are overridden by this macro.
590     llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
591 
ModuleMacroInfoModuleMacroInfo592     ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
593   };
594 
595   /// The state of a macro for an identifier.
596   class MacroState {
597     mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
598 
getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)599     ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
600                                    const IdentifierInfo *II) const {
601       if (II->isOutOfDate())
602         PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
603       // FIXME: Find a spare bit on IdentifierInfo and store a
604       //        HasModuleMacros flag.
605       if (!II->hasMacroDefinition() ||
606           (!PP.getLangOpts().Modules &&
607            !PP.getLangOpts().ModulesLocalVisibility) ||
608           !PP.CurSubmoduleState->VisibleModules.getGeneration())
609         return nullptr;
610 
611       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
612       if (!Info) {
613         Info = new (PP.getPreprocessorAllocator())
614             ModuleMacroInfo(State.get<MacroDirective *>());
615         State = Info;
616       }
617 
618       if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
619           Info->ActiveModuleMacrosGeneration)
620         PP.updateModuleMacroInfo(II, *Info);
621       return Info;
622     }
623 
624   public:
MacroState()625     MacroState() : MacroState(nullptr) {}
MacroState(MacroDirective * MD)626     MacroState(MacroDirective *MD) : State(MD) {}
627 
MacroState(MacroState && O)628     MacroState(MacroState &&O) noexcept : State(O.State) {
629       O.State = (MacroDirective *)nullptr;
630     }
631 
632     MacroState &operator=(MacroState &&O) noexcept {
633       auto S = O.State;
634       O.State = (MacroDirective *)nullptr;
635       State = S;
636       return *this;
637     }
638 
~MacroState()639     ~MacroState() {
640       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
641         Info->~ModuleMacroInfo();
642     }
643 
getLatest()644     MacroDirective *getLatest() const {
645       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
646         return Info->MD;
647       return State.get<MacroDirective*>();
648     }
649 
setLatest(MacroDirective * MD)650     void setLatest(MacroDirective *MD) {
651       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
652         Info->MD = MD;
653       else
654         State = MD;
655     }
656 
isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)657     bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
658       auto *Info = getModuleInfo(PP, II);
659       return Info ? Info->IsAmbiguous : false;
660     }
661 
662     ArrayRef<ModuleMacro *>
getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)663     getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
664       if (auto *Info = getModuleInfo(PP, II))
665         return Info->ActiveModuleMacros;
666       return None;
667     }
668 
findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)669     MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
670                                                SourceManager &SourceMgr) const {
671       // FIXME: Incorporate module macros into the result of this.
672       if (auto *Latest = getLatest())
673         return Latest->findDirectiveAtLoc(Loc, SourceMgr);
674       return {};
675     }
676 
overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)677     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
678       if (auto *Info = getModuleInfo(PP, II)) {
679         Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
680                                       Info->ActiveModuleMacros.begin(),
681                                       Info->ActiveModuleMacros.end());
682         Info->ActiveModuleMacros.clear();
683         Info->IsAmbiguous = false;
684       }
685     }
686 
getOverriddenMacros()687     ArrayRef<ModuleMacro*> getOverriddenMacros() const {
688       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
689         return Info->OverriddenMacros;
690       return None;
691     }
692 
setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)693     void setOverriddenMacros(Preprocessor &PP,
694                              ArrayRef<ModuleMacro *> Overrides) {
695       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
696       if (!Info) {
697         if (Overrides.empty())
698           return;
699         Info = new (PP.getPreprocessorAllocator())
700             ModuleMacroInfo(State.get<MacroDirective *>());
701         State = Info;
702       }
703       Info->OverriddenMacros.clear();
704       Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
705                                     Overrides.begin(), Overrides.end());
706       Info->ActiveModuleMacrosGeneration = 0;
707     }
708   };
709 
710   /// For each IdentifierInfo that was associated with a macro, we
711   /// keep a mapping to the history of all macro definitions and #undefs in
712   /// the reverse order (the latest one is in the head of the list).
713   ///
714   /// This mapping lives within the \p CurSubmoduleState.
715   using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
716 
717   struct SubmoduleState;
718 
719   /// Information about a submodule that we're currently building.
720   struct BuildingSubmoduleInfo {
721     /// The module that we are building.
722     Module *M;
723 
724     /// The location at which the module was included.
725     SourceLocation ImportLoc;
726 
727     /// Whether we entered this submodule via a pragma.
728     bool IsPragma;
729 
730     /// The previous SubmoduleState.
731     SubmoduleState *OuterSubmoduleState;
732 
733     /// The number of pending module macro names when we started building this.
734     unsigned OuterPendingModuleMacroNames;
735 
BuildingSubmoduleInfoBuildingSubmoduleInfo736     BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
737                           SubmoduleState *OuterSubmoduleState,
738                           unsigned OuterPendingModuleMacroNames)
739         : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
740           OuterSubmoduleState(OuterSubmoduleState),
741           OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
742   };
743   SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
744 
745   /// Information about a submodule's preprocessor state.
746   struct SubmoduleState {
747     /// The macros for the submodule.
748     MacroMap Macros;
749 
750     /// The set of modules that are visible within the submodule.
751     VisibleModuleSet VisibleModules;
752 
753     // FIXME: CounterValue?
754     // FIXME: PragmaPushMacroInfo?
755   };
756   std::map<Module *, SubmoduleState> Submodules;
757 
758   /// The preprocessor state for preprocessing outside of any submodule.
759   SubmoduleState NullSubmoduleState;
760 
761   /// The current submodule state. Will be \p NullSubmoduleState if we're not
762   /// in a submodule.
763   SubmoduleState *CurSubmoduleState;
764 
765   /// The set of known macros exported from modules.
766   llvm::FoldingSet<ModuleMacro> ModuleMacros;
767 
768   /// The names of potential module macros that we've not yet processed.
769   llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
770 
771   /// The list of module macros, for each identifier, that are not overridden by
772   /// any other module macro.
773   llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
774       LeafModuleMacros;
775 
776   /// Macros that we want to warn because they are not used at the end
777   /// of the translation unit.
778   ///
779   /// We store just their SourceLocations instead of
780   /// something like MacroInfo*. The benefit of this is that when we are
781   /// deserializing from PCH, we don't need to deserialize identifier & macros
782   /// just so that we can report that they are unused, we just warn using
783   /// the SourceLocations of this set (that will be filled by the ASTReader).
784   /// We are using SmallPtrSet instead of a vector for faster removal.
785   using WarnUnusedMacroLocsTy = llvm::SmallPtrSet<SourceLocation, 32>;
786   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
787 
788   /// A "freelist" of MacroArg objects that can be
789   /// reused for quick allocation.
790   MacroArgs *MacroArgCache = nullptr;
791 
792   /// For each IdentifierInfo used in a \#pragma push_macro directive,
793   /// we keep a MacroInfo stack used to restore the previous macro value.
794   llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
795       PragmaPushMacroInfo;
796 
797   // Various statistics we track for performance analysis.
798   unsigned NumDirectives = 0;
799   unsigned NumDefined = 0;
800   unsigned NumUndefined = 0;
801   unsigned NumPragma = 0;
802   unsigned NumIf = 0;
803   unsigned NumElse = 0;
804   unsigned NumEndif = 0;
805   unsigned NumEnteredSourceFiles = 0;
806   unsigned MaxIncludeStackDepth = 0;
807   unsigned NumMacroExpanded = 0;
808   unsigned NumFnMacroExpanded = 0;
809   unsigned NumBuiltinMacroExpanded = 0;
810   unsigned NumFastMacroExpanded = 0;
811   unsigned NumTokenPaste = 0;
812   unsigned NumFastTokenPaste = 0;
813   unsigned NumSkipped = 0;
814 
815   /// The predefined macros that preprocessor should use from the
816   /// command line etc.
817   std::string Predefines;
818 
819   /// The file ID for the preprocessor predefines.
820   FileID PredefinesFileID;
821 
822   /// The file ID for the PCH through header.
823   FileID PCHThroughHeaderFileID;
824 
825   /// Whether tokens are being skipped until a #pragma hdrstop is seen.
826   bool SkippingUntilPragmaHdrStop = false;
827 
828   /// Whether tokens are being skipped until the through header is seen.
829   bool SkippingUntilPCHThroughHeader = false;
830 
831   /// \{
832   /// Cache of macro expanders to reduce malloc traffic.
833   enum { TokenLexerCacheSize = 8 };
834   unsigned NumCachedTokenLexers;
835   std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
836   /// \}
837 
838   /// Keeps macro expanded tokens for TokenLexers.
839   //
840   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
841   /// going to lex in the cache and when it finishes the tokens are removed
842   /// from the end of the cache.
843   SmallVector<Token, 16> MacroExpandedTokens;
844   std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
845 
846   /// A record of the macro definitions and expansions that
847   /// occurred during preprocessing.
848   ///
849   /// This is an optional side structure that can be enabled with
850   /// \c createPreprocessingRecord() prior to preprocessing.
851   PreprocessingRecord *Record = nullptr;
852 
853   /// Cached tokens state.
854   using CachedTokensTy = SmallVector<Token, 1>;
855 
856   /// Cached tokens are stored here when we do backtracking or
857   /// lookahead. They are "lexed" by the CachingLex() method.
858   CachedTokensTy CachedTokens;
859 
860   /// The position of the cached token that CachingLex() should
861   /// "lex" next.
862   ///
863   /// If it points beyond the CachedTokens vector, it means that a normal
864   /// Lex() should be invoked.
865   CachedTokensTy::size_type CachedLexPos = 0;
866 
867   /// Stack of backtrack positions, allowing nested backtracks.
868   ///
869   /// The EnableBacktrackAtThisPos() method pushes a position to
870   /// indicate where CachedLexPos should be set when the BackTrack() method is
871   /// invoked (at which point the last position is popped).
872   std::vector<CachedTokensTy::size_type> BacktrackPositions;
873 
874   struct MacroInfoChain {
875     MacroInfo MI;
876     MacroInfoChain *Next;
877   };
878 
879   /// MacroInfos are managed as a chain for easy disposal.  This is the head
880   /// of that list.
881   MacroInfoChain *MIChainHead = nullptr;
882 
883   void updateOutOfDateIdentifier(IdentifierInfo &II) const;
884 
885 public:
886   Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
887                DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
888                HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
889                IdentifierInfoLookup *IILookup = nullptr,
890                bool OwnsHeaderSearch = false,
891                TranslationUnitKind TUKind = TU_Complete);
892 
893   ~Preprocessor();
894 
895   /// Initialize the preprocessor using information about the target.
896   ///
897   /// \param Target is owned by the caller and must remain valid for the
898   /// lifetime of the preprocessor.
899   /// \param AuxTarget is owned by the caller and must remain valid for
900   /// the lifetime of the preprocessor.
901   void Initialize(const TargetInfo &Target,
902                   const TargetInfo *AuxTarget = nullptr);
903 
904   /// Initialize the preprocessor to parse a model file
905   ///
906   /// To parse model files the preprocessor of the original source is reused to
907   /// preserver the identifier table. However to avoid some duplicate
908   /// information in the preprocessor some cleanup is needed before it is used
909   /// to parse model files. This method does that cleanup.
910   void InitializeForModelFile();
911 
912   /// Cleanup after model file parsing
913   void FinalizeForModelFile();
914 
915   /// Retrieve the preprocessor options used to initialize this
916   /// preprocessor.
getPreprocessorOpts()917   PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
918 
getDiagnostics()919   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
setDiagnostics(DiagnosticsEngine & D)920   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
921 
getLangOpts()922   const LangOptions &getLangOpts() const { return LangOpts; }
getTargetInfo()923   const TargetInfo &getTargetInfo() const { return *Target; }
getAuxTargetInfo()924   const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
getFileManager()925   FileManager &getFileManager() const { return FileMgr; }
getSourceManager()926   SourceManager &getSourceManager() const { return SourceMgr; }
getHeaderSearchInfo()927   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
928 
getIdentifierTable()929   IdentifierTable &getIdentifierTable() { return Identifiers; }
getIdentifierTable()930   const IdentifierTable &getIdentifierTable() const { return Identifiers; }
getSelectorTable()931   SelectorTable &getSelectorTable() { return Selectors; }
getBuiltinInfo()932   Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
getPreprocessorAllocator()933   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
934 
setExternalSource(ExternalPreprocessorSource * Source)935   void setExternalSource(ExternalPreprocessorSource *Source) {
936     ExternalSource = Source;
937   }
938 
getExternalSource()939   ExternalPreprocessorSource *getExternalSource() const {
940     return ExternalSource;
941   }
942 
943   /// Retrieve the module loader associated with this preprocessor.
getModuleLoader()944   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
945 
hadModuleLoaderFatalFailure()946   bool hadModuleLoaderFatalFailure() const {
947     return TheModuleLoader.HadFatalFailure;
948   }
949 
950   /// Retrieve the number of Directives that have been processed by the
951   /// Preprocessor.
getNumDirectives()952   unsigned getNumDirectives() const {
953     return NumDirectives;
954   }
955 
956   /// True if we are currently preprocessing a #if or #elif directive
isParsingIfOrElifDirective()957   bool isParsingIfOrElifDirective() const {
958     return ParsingIfOrElifDirective;
959   }
960 
961   /// Control whether the preprocessor retains comments in output.
SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)962   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
963     this->KeepComments = KeepComments | KeepMacroComments;
964     this->KeepMacroComments = KeepMacroComments;
965   }
966 
getCommentRetentionState()967   bool getCommentRetentionState() const { return KeepComments; }
968 
setPragmasEnabled(bool Enabled)969   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
getPragmasEnabled()970   bool getPragmasEnabled() const { return PragmasEnabled; }
971 
SetSuppressIncludeNotFoundError(bool Suppress)972   void SetSuppressIncludeNotFoundError(bool Suppress) {
973     SuppressIncludeNotFoundError = Suppress;
974   }
975 
GetSuppressIncludeNotFoundError()976   bool GetSuppressIncludeNotFoundError() {
977     return SuppressIncludeNotFoundError;
978   }
979 
980   /// Sets whether the preprocessor is responsible for producing output or if
981   /// it is producing tokens to be consumed by Parse and Sema.
setPreprocessedOutput(bool IsPreprocessedOutput)982   void setPreprocessedOutput(bool IsPreprocessedOutput) {
983     PreprocessedOutput = IsPreprocessedOutput;
984   }
985 
986   /// Returns true if the preprocessor is responsible for generating output,
987   /// false if it is producing tokens to be consumed by Parse and Sema.
isPreprocessedOutput()988   bool isPreprocessedOutput() const { return PreprocessedOutput; }
989 
990   /// Return true if we are lexing directly from the specified lexer.
isCurrentLexer(const PreprocessorLexer * L)991   bool isCurrentLexer(const PreprocessorLexer *L) const {
992     return CurPPLexer == L;
993   }
994 
995   /// Return the current lexer being lexed from.
996   ///
997   /// Note that this ignores any potentially active macro expansions and _Pragma
998   /// expansions going on at the time.
getCurrentLexer()999   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1000 
1001   /// Return the current file lexer being lexed from.
1002   ///
1003   /// Note that this ignores any potentially active macro expansions and _Pragma
1004   /// expansions going on at the time.
1005   PreprocessorLexer *getCurrentFileLexer() const;
1006 
1007   /// Return the submodule owning the file being lexed. This may not be
1008   /// the current module if we have changed modules since entering the file.
getCurrentLexerSubmodule()1009   Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1010 
1011   /// Returns the FileID for the preprocessor predefines.
getPredefinesFileID()1012   FileID getPredefinesFileID() const { return PredefinesFileID; }
1013 
1014   /// \{
1015   /// Accessors for preprocessor callbacks.
1016   ///
1017   /// Note that this class takes ownership of any PPCallbacks object given to
1018   /// it.
getPPCallbacks()1019   PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
addPPCallbacks(std::unique_ptr<PPCallbacks> C)1020   void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1021     if (Callbacks)
1022       C = std::make_unique<PPChainedCallbacks>(std::move(C),
1023                                                 std::move(Callbacks));
1024     Callbacks = std::move(C);
1025   }
1026   /// \}
1027 
1028   /// Get the number of tokens processed so far.
getTokenCount()1029   unsigned getTokenCount() const { return TokenCount; }
1030 
1031   /// Get the max number of tokens before issuing a -Wmax-tokens warning.
getMaxTokens()1032   unsigned getMaxTokens() const { return MaxTokens; }
1033 
overrideMaxTokens(unsigned Value,SourceLocation Loc)1034   void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1035     MaxTokens = Value;
1036     MaxTokensOverrideLoc = Loc;
1037   };
1038 
getMaxTokensOverrideLoc()1039   SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1040 
1041   /// Register a function that would be called on each token in the final
1042   /// expanded token stream.
1043   /// This also reports annotation tokens produced by the parser.
setTokenWatcher(llvm::unique_function<void (const clang::Token &)> F)1044   void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1045     OnToken = std::move(F);
1046   }
1047 
setPreprocessToken(bool Preprocess)1048   void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1049 
isMacroDefined(StringRef Id)1050   bool isMacroDefined(StringRef Id) {
1051     return isMacroDefined(&Identifiers.get(Id));
1052   }
isMacroDefined(const IdentifierInfo * II)1053   bool isMacroDefined(const IdentifierInfo *II) {
1054     return II->hasMacroDefinition() &&
1055            (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1056   }
1057 
1058   /// Determine whether II is defined as a macro within the module M,
1059   /// if that is a module that we've already preprocessed. Does not check for
1060   /// macros imported into M.
isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)1061   bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1062     if (!II->hasMacroDefinition())
1063       return false;
1064     auto I = Submodules.find(M);
1065     if (I == Submodules.end())
1066       return false;
1067     auto J = I->second.Macros.find(II);
1068     if (J == I->second.Macros.end())
1069       return false;
1070     auto *MD = J->second.getLatest();
1071     return MD && MD->isDefined();
1072   }
1073 
getMacroDefinition(const IdentifierInfo * II)1074   MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1075     if (!II->hasMacroDefinition())
1076       return {};
1077 
1078     MacroState &S = CurSubmoduleState->Macros[II];
1079     auto *MD = S.getLatest();
1080     while (MD && isa<VisibilityMacroDirective>(MD))
1081       MD = MD->getPrevious();
1082     return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1083                            S.getActiveModuleMacros(*this, II),
1084                            S.isAmbiguous(*this, II));
1085   }
1086 
getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)1087   MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1088                                           SourceLocation Loc) {
1089     if (!II->hadMacroDefinition())
1090       return {};
1091 
1092     MacroState &S = CurSubmoduleState->Macros[II];
1093     MacroDirective::DefInfo DI;
1094     if (auto *MD = S.getLatest())
1095       DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1096     // FIXME: Compute the set of active module macros at the specified location.
1097     return MacroDefinition(DI.getDirective(),
1098                            S.getActiveModuleMacros(*this, II),
1099                            S.isAmbiguous(*this, II));
1100   }
1101 
1102   /// Given an identifier, return its latest non-imported MacroDirective
1103   /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
getLocalMacroDirective(const IdentifierInfo * II)1104   MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1105     if (!II->hasMacroDefinition())
1106       return nullptr;
1107 
1108     auto *MD = getLocalMacroDirectiveHistory(II);
1109     if (!MD || MD->getDefinition().isUndefined())
1110       return nullptr;
1111 
1112     return MD;
1113   }
1114 
getMacroInfo(const IdentifierInfo * II)1115   const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1116     return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1117   }
1118 
getMacroInfo(const IdentifierInfo * II)1119   MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1120     if (!II->hasMacroDefinition())
1121       return nullptr;
1122     if (auto MD = getMacroDefinition(II))
1123       return MD.getMacroInfo();
1124     return nullptr;
1125   }
1126 
1127   /// Given an identifier, return the latest non-imported macro
1128   /// directive for that identifier.
1129   ///
1130   /// One can iterate over all previous macro directives from the most recent
1131   /// one.
1132   MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1133 
1134   /// Add a directive to the macro directive history for this identifier.
1135   void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)1136   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1137                                              SourceLocation Loc) {
1138     DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1139     appendMacroDirective(II, MD);
1140     return MD;
1141   }
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)1142   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1143                                              MacroInfo *MI) {
1144     return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1145   }
1146 
1147   /// Set a MacroDirective that was loaded from a PCH file.
1148   void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1149                                MacroDirective *MD);
1150 
1151   /// Register an exported macro for a module and identifier.
1152   ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
1153                               ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1154   ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II);
1155 
1156   /// Get the list of leaf (non-overridden) module macros for a name.
getLeafModuleMacros(const IdentifierInfo * II)1157   ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1158     if (II->isOutOfDate())
1159       updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
1160     auto I = LeafModuleMacros.find(II);
1161     if (I != LeafModuleMacros.end())
1162       return I->second;
1163     return None;
1164   }
1165 
1166   /// Get the list of submodules that we're currently building.
getBuildingSubmodules()1167   ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const {
1168     return BuildingSubmoduleStack;
1169   }
1170 
1171   /// \{
1172   /// Iterators for the macro history table. Currently defined macros have
1173   /// IdentifierInfo::hasMacroDefinition() set and an empty
1174   /// MacroInfo::getUndefLoc() at the head of the list.
1175   using macro_iterator = MacroMap::const_iterator;
1176 
1177   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1178   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1179 
1180   llvm::iterator_range<macro_iterator>
1181   macros(bool IncludeExternalMacros = true) const {
1182     macro_iterator begin = macro_begin(IncludeExternalMacros);
1183     macro_iterator end = macro_end(IncludeExternalMacros);
1184     return llvm::make_range(begin, end);
1185   }
1186 
1187   /// \}
1188 
1189   /// Return the name of the macro defined before \p Loc that has
1190   /// spelling \p Tokens.  If there are multiple macros with same spelling,
1191   /// return the last one defined.
1192   StringRef getLastMacroWithSpelling(SourceLocation Loc,
1193                                      ArrayRef<TokenValue> Tokens) const;
1194 
getPredefines()1195   const std::string &getPredefines() const { return Predefines; }
1196 
1197   /// Set the predefines for this Preprocessor.
1198   ///
1199   /// These predefines are automatically injected when parsing the main file.
setPredefines(const char * P)1200   void setPredefines(const char *P) { Predefines = P; }
setPredefines(StringRef P)1201   void setPredefines(StringRef P) { Predefines = std::string(P); }
1202 
1203   /// Return information about the specified preprocessor
1204   /// identifier token.
getIdentifierInfo(StringRef Name)1205   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1206     return &Identifiers.get(Name);
1207   }
1208 
1209   /// Add the specified pragma handler to this preprocessor.
1210   ///
1211   /// If \p Namespace is non-null, then it is a token required to exist on the
1212   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1213   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
AddPragmaHandler(PragmaHandler * Handler)1214   void AddPragmaHandler(PragmaHandler *Handler) {
1215     AddPragmaHandler(StringRef(), Handler);
1216   }
1217 
1218   /// Remove the specific pragma handler from this preprocessor.
1219   ///
1220   /// If \p Namespace is non-null, then it should be the namespace that
1221   /// \p Handler was added to. It is an error to remove a handler that
1222   /// has not been registered.
1223   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
RemovePragmaHandler(PragmaHandler * Handler)1224   void RemovePragmaHandler(PragmaHandler *Handler) {
1225     RemovePragmaHandler(StringRef(), Handler);
1226   }
1227 
1228   /// Install empty handlers for all pragmas (making them ignored).
1229   void IgnorePragmas();
1230 
1231   /// Set empty line handler.
setEmptylineHandler(EmptylineHandler * Handler)1232   void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1233 
getEmptylineHandler()1234   EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1235 
1236   /// Add the specified comment handler to the preprocessor.
1237   void addCommentHandler(CommentHandler *Handler);
1238 
1239   /// Remove the specified comment handler.
1240   ///
1241   /// It is an error to remove a handler that has not been registered.
1242   void removeCommentHandler(CommentHandler *Handler);
1243 
1244   /// Set the code completion handler to the given object.
setCodeCompletionHandler(CodeCompletionHandler & Handler)1245   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1246     CodeComplete = &Handler;
1247   }
1248 
1249   /// Retrieve the current code-completion handler.
getCodeCompletionHandler()1250   CodeCompletionHandler *getCodeCompletionHandler() const {
1251     return CodeComplete;
1252   }
1253 
1254   /// Clear out the code completion handler.
clearCodeCompletionHandler()1255   void clearCodeCompletionHandler() {
1256     CodeComplete = nullptr;
1257   }
1258 
1259   /// Hook used by the lexer to invoke the "included file" code
1260   /// completion point.
1261   void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1262 
1263   /// Hook used by the lexer to invoke the "natural language" code
1264   /// completion point.
1265   void CodeCompleteNaturalLanguage();
1266 
1267   /// Set the code completion token for filtering purposes.
setCodeCompletionIdentifierInfo(IdentifierInfo * Filter)1268   void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1269     CodeCompletionII = Filter;
1270   }
1271 
1272   /// Set the code completion token range for detecting replacement range later
1273   /// on.
setCodeCompletionTokenRange(const SourceLocation Start,const SourceLocation End)1274   void setCodeCompletionTokenRange(const SourceLocation Start,
1275                                    const SourceLocation End) {
1276     CodeCompletionTokenRange = {Start, End};
1277   }
getCodeCompletionTokenRange()1278   SourceRange getCodeCompletionTokenRange() const {
1279     return CodeCompletionTokenRange;
1280   }
1281 
1282   /// Get the code completion token for filtering purposes.
getCodeCompletionFilter()1283   StringRef getCodeCompletionFilter() {
1284     if (CodeCompletionII)
1285       return CodeCompletionII->getName();
1286     return {};
1287   }
1288 
1289   /// Retrieve the preprocessing record, or NULL if there is no
1290   /// preprocessing record.
getPreprocessingRecord()1291   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1292 
1293   /// Create a new preprocessing record, which will keep track of
1294   /// all macro expansions, macro definitions, etc.
1295   void createPreprocessingRecord();
1296 
1297   /// Returns true if the FileEntry is the PCH through header.
1298   bool isPCHThroughHeader(const FileEntry *FE);
1299 
1300   /// True if creating a PCH with a through header.
1301   bool creatingPCHWithThroughHeader();
1302 
1303   /// True if using a PCH with a through header.
1304   bool usingPCHWithThroughHeader();
1305 
1306   /// True if creating a PCH with a #pragma hdrstop.
1307   bool creatingPCHWithPragmaHdrStop();
1308 
1309   /// True if using a PCH with a #pragma hdrstop.
1310   bool usingPCHWithPragmaHdrStop();
1311 
1312   /// Skip tokens until after the #include of the through header or
1313   /// until after a #pragma hdrstop.
1314   void SkipTokensWhileUsingPCH();
1315 
1316   /// Process directives while skipping until the through header or
1317   /// #pragma hdrstop is found.
1318   void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1319                                            SourceLocation HashLoc);
1320 
1321   /// Enter the specified FileID as the main source file,
1322   /// which implicitly adds the builtin defines etc.
1323   void EnterMainSourceFile();
1324 
1325   /// Inform the preprocessor callbacks that processing is complete.
1326   void EndSourceFile();
1327 
1328   /// Add a source file to the top of the include stack and
1329   /// start lexing tokens from it instead of the current buffer.
1330   ///
1331   /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1332   bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir,
1333                        SourceLocation Loc);
1334 
1335   /// Add a Macro to the top of the include stack and start lexing
1336   /// tokens from it instead of the current buffer.
1337   ///
1338   /// \param Args specifies the tokens input to a function-like macro.
1339   /// \param ILEnd specifies the location of the ')' for a function-like macro
1340   /// or the identifier for an object-like macro.
1341   void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1342                   MacroArgs *Args);
1343 
1344 private:
1345   /// Add a "macro" context to the top of the include stack,
1346   /// which will cause the lexer to start returning the specified tokens.
1347   ///
1348   /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1349   /// will not be subject to further macro expansion. Otherwise, these tokens
1350   /// will be re-macro-expanded when/if expansion is enabled.
1351   ///
1352   /// If \p OwnsTokens is false, this method assumes that the specified stream
1353   /// of tokens has a permanent owner somewhere, so they do not need to be
1354   /// copied. If it is true, it assumes the array of tokens is allocated with
1355   /// \c new[] and the Preprocessor will delete[] it.
1356   ///
1357   /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1358   /// set, see the flag documentation for details.
1359   void EnterTokenStream(const Token *Toks, unsigned NumToks,
1360                         bool DisableMacroExpansion, bool OwnsTokens,
1361                         bool IsReinject);
1362 
1363 public:
EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion,bool IsReinject)1364   void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1365                         bool DisableMacroExpansion, bool IsReinject) {
1366     EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1367                      IsReinject);
1368   }
1369 
EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion,bool IsReinject)1370   void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1371                         bool IsReinject) {
1372     EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1373                      IsReinject);
1374   }
1375 
1376   /// Pop the current lexer/macro exp off the top of the lexer stack.
1377   ///
1378   /// This should only be used in situations where the current state of the
1379   /// top-of-stack lexer is known.
1380   void RemoveTopOfLexerStack();
1381 
1382   /// From the point that this method is called, and until
1383   /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1384   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1385   /// make the Preprocessor re-lex the same tokens.
1386   ///
1387   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1388   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1389   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1390   ///
1391   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1392   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1393   /// tokens will continue indefinitely.
1394   ///
1395   void EnableBacktrackAtThisPos();
1396 
1397   /// Disable the last EnableBacktrackAtThisPos call.
1398   void CommitBacktrackedTokens();
1399 
1400   /// Make Preprocessor re-lex the tokens that were lexed since
1401   /// EnableBacktrackAtThisPos() was previously called.
1402   void Backtrack();
1403 
1404   /// True if EnableBacktrackAtThisPos() was called and
1405   /// caching of tokens is on.
isBacktrackEnabled()1406   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1407 
1408   /// Lex the next token for this preprocessor.
1409   void Lex(Token &Result);
1410 
1411   /// Lex a token, forming a header-name token if possible.
1412   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1413 
1414   bool LexAfterModuleImport(Token &Result);
1415   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1416 
1417   void makeModuleVisible(Module *M, SourceLocation Loc);
1418 
getModuleImportLoc(Module * M)1419   SourceLocation getModuleImportLoc(Module *M) const {
1420     return CurSubmoduleState->VisibleModules.getImportLoc(M);
1421   }
1422 
1423   /// Lex a string literal, which may be the concatenation of multiple
1424   /// string literals and may even come from macro expansion.
1425   /// \returns true on success, false if a error diagnostic has been generated.
LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1426   bool LexStringLiteral(Token &Result, std::string &String,
1427                         const char *DiagnosticTag, bool AllowMacroExpansion) {
1428     if (AllowMacroExpansion)
1429       Lex(Result);
1430     else
1431       LexUnexpandedToken(Result);
1432     return FinishLexStringLiteral(Result, String, DiagnosticTag,
1433                                   AllowMacroExpansion);
1434   }
1435 
1436   /// Complete the lexing of a string literal where the first token has
1437   /// already been lexed (see LexStringLiteral).
1438   bool FinishLexStringLiteral(Token &Result, std::string &String,
1439                               const char *DiagnosticTag,
1440                               bool AllowMacroExpansion);
1441 
1442   /// Lex a token.  If it's a comment, keep lexing until we get
1443   /// something not a comment.
1444   ///
1445   /// This is useful in -E -C mode where comments would foul up preprocessor
1446   /// directive handling.
LexNonComment(Token & Result)1447   void LexNonComment(Token &Result) {
1448     do
1449       Lex(Result);
1450     while (Result.getKind() == tok::comment);
1451   }
1452 
1453   /// Just like Lex, but disables macro expansion of identifier tokens.
LexUnexpandedToken(Token & Result)1454   void LexUnexpandedToken(Token &Result) {
1455     // Disable macro expansion.
1456     bool OldVal = DisableMacroExpansion;
1457     DisableMacroExpansion = true;
1458     // Lex the token.
1459     Lex(Result);
1460 
1461     // Reenable it.
1462     DisableMacroExpansion = OldVal;
1463   }
1464 
1465   /// Like LexNonComment, but this disables macro expansion of
1466   /// identifier tokens.
LexUnexpandedNonComment(Token & Result)1467   void LexUnexpandedNonComment(Token &Result) {
1468     do
1469       LexUnexpandedToken(Result);
1470     while (Result.getKind() == tok::comment);
1471   }
1472 
1473   /// Parses a simple integer literal to get its numeric value.  Floating
1474   /// point literals and user defined literals are rejected.  Used primarily to
1475   /// handle pragmas that accept integer arguments.
1476   bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1477 
1478   /// Disables macro expansion everywhere except for preprocessor directives.
SetMacroExpansionOnlyInDirectives()1479   void SetMacroExpansionOnlyInDirectives() {
1480     DisableMacroExpansion = true;
1481     MacroExpansionInDirectivesOverride = true;
1482   }
1483 
1484   /// Peeks ahead N tokens and returns that token without consuming any
1485   /// tokens.
1486   ///
1487   /// LookAhead(0) returns the next token that would be returned by Lex(),
1488   /// LookAhead(1) returns the token after it, etc.  This returns normal
1489   /// tokens after phase 5.  As such, it is equivalent to using
1490   /// 'Lex', not 'LexUnexpandedToken'.
LookAhead(unsigned N)1491   const Token &LookAhead(unsigned N) {
1492     assert(LexLevel == 0 && "cannot use lookahead while lexing");
1493     if (CachedLexPos + N < CachedTokens.size())
1494       return CachedTokens[CachedLexPos+N];
1495     else
1496       return PeekAhead(N+1);
1497   }
1498 
1499   /// When backtracking is enabled and tokens are cached,
1500   /// this allows to revert a specific number of tokens.
1501   ///
1502   /// Note that the number of tokens being reverted should be up to the last
1503   /// backtrack position, not more.
RevertCachedTokens(unsigned N)1504   void RevertCachedTokens(unsigned N) {
1505     assert(isBacktrackEnabled() &&
1506            "Should only be called when tokens are cached for backtracking");
1507     assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1508          && "Should revert tokens up to the last backtrack position, not more");
1509     assert(signed(CachedLexPos) - signed(N) >= 0 &&
1510            "Corrupted backtrack positions ?");
1511     CachedLexPos -= N;
1512   }
1513 
1514   /// Enters a token in the token stream to be lexed next.
1515   ///
1516   /// If BackTrack() is called afterwards, the token will remain at the
1517   /// insertion point.
1518   /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1519   /// flag set. See the flag documentation for details.
EnterToken(const Token & Tok,bool IsReinject)1520   void EnterToken(const Token &Tok, bool IsReinject) {
1521     if (LexLevel) {
1522       // It's not correct in general to enter caching lex mode while in the
1523       // middle of a nested lexing action.
1524       auto TokCopy = std::make_unique<Token[]>(1);
1525       TokCopy[0] = Tok;
1526       EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1527     } else {
1528       EnterCachingLexMode();
1529       assert(IsReinject && "new tokens in the middle of cached stream");
1530       CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1531     }
1532   }
1533 
1534   /// We notify the Preprocessor that if it is caching tokens (because
1535   /// backtrack is enabled) it should replace the most recent cached tokens
1536   /// with the given annotation token. This function has no effect if
1537   /// backtracking is not enabled.
1538   ///
1539   /// Note that the use of this function is just for optimization, so that the
1540   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1541   /// invoked.
AnnotateCachedTokens(const Token & Tok)1542   void AnnotateCachedTokens(const Token &Tok) {
1543     assert(Tok.isAnnotation() && "Expected annotation token");
1544     if (CachedLexPos != 0 && isBacktrackEnabled())
1545       AnnotatePreviousCachedTokens(Tok);
1546   }
1547 
1548   /// Get the location of the last cached token, suitable for setting the end
1549   /// location of an annotation token.
getLastCachedTokenLocation()1550   SourceLocation getLastCachedTokenLocation() const {
1551     assert(CachedLexPos != 0);
1552     return CachedTokens[CachedLexPos-1].getLastLoc();
1553   }
1554 
1555   /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1556   /// CachedTokens.
1557   bool IsPreviousCachedToken(const Token &Tok) const;
1558 
1559   /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1560   /// in \p NewToks.
1561   ///
1562   /// Useful when a token needs to be split in smaller ones and CachedTokens
1563   /// most recent token must to be updated to reflect that.
1564   void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1565 
1566   /// Replace the last token with an annotation token.
1567   ///
1568   /// Like AnnotateCachedTokens(), this routine replaces an
1569   /// already-parsed (and resolved) token with an annotation
1570   /// token. However, this routine only replaces the last token with
1571   /// the annotation token; it does not affect any other cached
1572   /// tokens. This function has no effect if backtracking is not
1573   /// enabled.
ReplaceLastTokenWithAnnotation(const Token & Tok)1574   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1575     assert(Tok.isAnnotation() && "Expected annotation token");
1576     if (CachedLexPos != 0 && isBacktrackEnabled())
1577       CachedTokens[CachedLexPos-1] = Tok;
1578   }
1579 
1580   /// Enter an annotation token into the token stream.
1581   void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1582                             void *AnnotationVal);
1583 
1584   /// Determine whether it's possible for a future call to Lex to produce an
1585   /// annotation token created by a previous call to EnterAnnotationToken.
mightHavePendingAnnotationTokens()1586   bool mightHavePendingAnnotationTokens() {
1587     return CurLexerKind != CLK_Lexer;
1588   }
1589 
1590   /// Update the current token to represent the provided
1591   /// identifier, in order to cache an action performed by typo correction.
TypoCorrectToken(const Token & Tok)1592   void TypoCorrectToken(const Token &Tok) {
1593     assert(Tok.getIdentifierInfo() && "Expected identifier token");
1594     if (CachedLexPos != 0 && isBacktrackEnabled())
1595       CachedTokens[CachedLexPos-1] = Tok;
1596   }
1597 
1598   /// Recompute the current lexer kind based on the CurLexer/
1599   /// CurTokenLexer pointers.
1600   void recomputeCurLexerKind();
1601 
1602   /// Returns true if incremental processing is enabled
isIncrementalProcessingEnabled()1603   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1604 
1605   /// Enables the incremental processing
1606   void enableIncrementalProcessing(bool value = true) {
1607     IncrementalProcessing = value;
1608   }
1609 
1610   /// Specify the point at which code-completion will be performed.
1611   ///
1612   /// \param File the file in which code completion should occur. If
1613   /// this file is included multiple times, code-completion will
1614   /// perform completion the first time it is included. If NULL, this
1615   /// function clears out the code-completion point.
1616   ///
1617   /// \param Line the line at which code completion should occur
1618   /// (1-based).
1619   ///
1620   /// \param Column the column at which code completion should occur
1621   /// (1-based).
1622   ///
1623   /// \returns true if an error occurred, false otherwise.
1624   bool SetCodeCompletionPoint(const FileEntry *File,
1625                               unsigned Line, unsigned Column);
1626 
1627   /// Determine if we are performing code completion.
isCodeCompletionEnabled()1628   bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1629 
1630   /// Returns the location of the code-completion point.
1631   ///
1632   /// Returns an invalid location if code-completion is not enabled or the file
1633   /// containing the code-completion point has not been lexed yet.
getCodeCompletionLoc()1634   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1635 
1636   /// Returns the start location of the file of code-completion point.
1637   ///
1638   /// Returns an invalid location if code-completion is not enabled or the file
1639   /// containing the code-completion point has not been lexed yet.
getCodeCompletionFileLoc()1640   SourceLocation getCodeCompletionFileLoc() const {
1641     return CodeCompletionFileLoc;
1642   }
1643 
1644   /// Returns true if code-completion is enabled and we have hit the
1645   /// code-completion point.
isCodeCompletionReached()1646   bool isCodeCompletionReached() const { return CodeCompletionReached; }
1647 
1648   /// Note that we hit the code-completion point.
setCodeCompletionReached()1649   void setCodeCompletionReached() {
1650     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1651     CodeCompletionReached = true;
1652     // Silence any diagnostics that occur after we hit the code-completion.
1653     getDiagnostics().setSuppressAllDiagnostics(true);
1654   }
1655 
1656   /// The location of the currently-active \#pragma clang
1657   /// arc_cf_code_audited begin.
1658   ///
1659   /// Returns an invalid location if there is no such pragma active.
1660   std::pair<IdentifierInfo *, SourceLocation>
getPragmaARCCFCodeAuditedInfo()1661   getPragmaARCCFCodeAuditedInfo() const {
1662     return PragmaARCCFCodeAuditedInfo;
1663   }
1664 
1665   /// Set the location of the currently-active \#pragma clang
1666   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
setPragmaARCCFCodeAuditedInfo(IdentifierInfo * Ident,SourceLocation Loc)1667   void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
1668                                      SourceLocation Loc) {
1669     PragmaARCCFCodeAuditedInfo = {Ident, Loc};
1670   }
1671 
1672   /// The location of the currently-active \#pragma clang
1673   /// assume_nonnull begin.
1674   ///
1675   /// Returns an invalid location if there is no such pragma active.
getPragmaAssumeNonNullLoc()1676   SourceLocation getPragmaAssumeNonNullLoc() const {
1677     return PragmaAssumeNonNullLoc;
1678   }
1679 
1680   /// Set the location of the currently-active \#pragma clang
1681   /// assume_nonnull begin.  An invalid location ends the pragma.
setPragmaAssumeNonNullLoc(SourceLocation Loc)1682   void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
1683     PragmaAssumeNonNullLoc = Loc;
1684   }
1685 
1686   /// Set the directory in which the main file should be considered
1687   /// to have been found, if it is not a real file.
setMainFileDir(const DirectoryEntry * Dir)1688   void setMainFileDir(const DirectoryEntry *Dir) {
1689     MainFileDir = Dir;
1690   }
1691 
1692   /// Instruct the preprocessor to skip part of the main source file.
1693   ///
1694   /// \param Bytes The number of bytes in the preamble to skip.
1695   ///
1696   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1697   /// start of a line.
setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)1698   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1699     SkipMainFilePreamble.first = Bytes;
1700     SkipMainFilePreamble.second = StartOfLine;
1701   }
1702 
1703   /// Forwarding function for diagnostics.  This emits a diagnostic at
1704   /// the specified Token's location, translating the token's start
1705   /// position in the current buffer into a SourcePosition object for rendering.
Diag(SourceLocation Loc,unsigned DiagID)1706   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1707     return Diags->Report(Loc, DiagID);
1708   }
1709 
Diag(const Token & Tok,unsigned DiagID)1710   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1711     return Diags->Report(Tok.getLocation(), DiagID);
1712   }
1713 
1714   /// Return the 'spelling' of the token at the given
1715   /// location; does not go up to the spelling location or down to the
1716   /// expansion location.
1717   ///
1718   /// \param buffer A buffer which will be used only if the token requires
1719   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
1720   /// \param invalid If non-null, will be set \c true if an error occurs.
1721   StringRef getSpelling(SourceLocation loc,
1722                         SmallVectorImpl<char> &buffer,
1723                         bool *invalid = nullptr) const {
1724     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1725   }
1726 
1727   /// Return the 'spelling' of the Tok token.
1728   ///
1729   /// The spelling of a token is the characters used to represent the token in
1730   /// the source file after trigraph expansion and escaped-newline folding.  In
1731   /// particular, this wants to get the true, uncanonicalized, spelling of
1732   /// things like digraphs, UCNs, etc.
1733   ///
1734   /// \param Invalid If non-null, will be set \c true if an error occurs.
1735   std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1736     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1737   }
1738 
1739   /// Get the spelling of a token into a preallocated buffer, instead
1740   /// of as an std::string.
1741   ///
1742   /// The caller is required to allocate enough space for the token, which is
1743   /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1744   /// actual result is returned.
1745   ///
1746   /// Note that this method may do two possible things: it may either fill in
1747   /// the buffer specified with characters, or it may *change the input pointer*
1748   /// to point to a constant buffer with the data already in it (avoiding a
1749   /// copy).  The caller is not allowed to modify the returned buffer pointer
1750   /// if an internal buffer is returned.
1751   unsigned getSpelling(const Token &Tok, const char *&Buffer,
1752                        bool *Invalid = nullptr) const {
1753     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1754   }
1755 
1756   /// Get the spelling of a token into a SmallVector.
1757   ///
1758   /// Note that the returned StringRef may not point to the
1759   /// supplied buffer if a copy can be avoided.
1760   StringRef getSpelling(const Token &Tok,
1761                         SmallVectorImpl<char> &Buffer,
1762                         bool *Invalid = nullptr) const;
1763 
1764   /// Relex the token at the specified location.
1765   /// \returns true if there was a failure, false on success.
1766   bool getRawToken(SourceLocation Loc, Token &Result,
1767                    bool IgnoreWhiteSpace = false) {
1768     return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1769   }
1770 
1771   /// Given a Token \p Tok that is a numeric constant with length 1,
1772   /// return the character.
1773   char
1774   getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1775                                               bool *Invalid = nullptr) const {
1776     assert(Tok.is(tok::numeric_constant) &&
1777            Tok.getLength() == 1 && "Called on unsupported token");
1778     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1779 
1780     // If the token is carrying a literal data pointer, just use it.
1781     if (const char *D = Tok.getLiteralData())
1782       return *D;
1783 
1784     // Otherwise, fall back on getCharacterData, which is slower, but always
1785     // works.
1786     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1787   }
1788 
1789   /// Retrieve the name of the immediate macro expansion.
1790   ///
1791   /// This routine starts from a source location, and finds the name of the
1792   /// macro responsible for its immediate expansion. It looks through any
1793   /// intervening macro argument expansions to compute this. It returns a
1794   /// StringRef that refers to the SourceManager-owned buffer of the source
1795   /// where that macro name is spelled. Thus, the result shouldn't out-live
1796   /// the SourceManager.
getImmediateMacroName(SourceLocation Loc)1797   StringRef getImmediateMacroName(SourceLocation Loc) {
1798     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1799   }
1800 
1801   /// Plop the specified string into a scratch buffer and set the
1802   /// specified token's location and length to it.
1803   ///
1804   /// If specified, the source location provides a location of the expansion
1805   /// point of the token.
1806   void CreateString(StringRef Str, Token &Tok,
1807                     SourceLocation ExpansionLocStart = SourceLocation(),
1808                     SourceLocation ExpansionLocEnd = SourceLocation());
1809 
1810   /// Split the first Length characters out of the token starting at TokLoc
1811   /// and return a location pointing to the split token. Re-lexing from the
1812   /// split token will return the split token rather than the original.
1813   SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
1814 
1815   /// Computes the source location just past the end of the
1816   /// token at this source location.
1817   ///
1818   /// This routine can be used to produce a source location that
1819   /// points just past the end of the token referenced by \p Loc, and
1820   /// is generally used when a diagnostic needs to point just after a
1821   /// token where it expected something different that it received. If
1822   /// the returned source location would not be meaningful (e.g., if
1823   /// it points into a macro), this routine returns an invalid
1824   /// source location.
1825   ///
1826   /// \param Offset an offset from the end of the token, where the source
1827   /// location should refer to. The default offset (0) produces a source
1828   /// location pointing just past the end of the token; an offset of 1 produces
1829   /// a source location pointing to the last character in the token, etc.
1830   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1831     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1832   }
1833 
1834   /// Returns true if the given MacroID location points at the first
1835   /// token of the macro expansion.
1836   ///
1837   /// \param MacroBegin If non-null and function returns true, it is set to
1838   /// begin location of the macro.
1839   bool isAtStartOfMacroExpansion(SourceLocation loc,
1840                                  SourceLocation *MacroBegin = nullptr) const {
1841     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1842                                             MacroBegin);
1843   }
1844 
1845   /// Returns true if the given MacroID location points at the last
1846   /// token of the macro expansion.
1847   ///
1848   /// \param MacroEnd If non-null and function returns true, it is set to
1849   /// end location of the macro.
1850   bool isAtEndOfMacroExpansion(SourceLocation loc,
1851                                SourceLocation *MacroEnd = nullptr) const {
1852     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1853   }
1854 
1855   /// Print the token to stderr, used for debugging.
1856   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1857   void DumpLocation(SourceLocation Loc) const;
1858   void DumpMacro(const MacroInfo &MI) const;
1859   void dumpMacroInfo(const IdentifierInfo *II);
1860 
1861   /// Given a location that specifies the start of a
1862   /// token, return a new location that specifies a character within the token.
AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)1863   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1864                                          unsigned Char) const {
1865     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1866   }
1867 
1868   /// Increment the counters for the number of token paste operations
1869   /// performed.
1870   ///
1871   /// If fast was specified, this is a 'fast paste' case we handled.
IncrementPasteCounter(bool isFast)1872   void IncrementPasteCounter(bool isFast) {
1873     if (isFast)
1874       ++NumFastTokenPaste;
1875     else
1876       ++NumTokenPaste;
1877   }
1878 
1879   void PrintStats();
1880 
1881   size_t getTotalMemory() const;
1882 
1883   /// When the macro expander pastes together a comment (/##/) in Microsoft
1884   /// mode, this method handles updating the current state, returning the
1885   /// token on the next source line.
1886   void HandleMicrosoftCommentPaste(Token &Tok);
1887 
1888   //===--------------------------------------------------------------------===//
1889   // Preprocessor callback methods.  These are invoked by a lexer as various
1890   // directives and events are found.
1891 
1892   /// Given a tok::raw_identifier token, look up the
1893   /// identifier information for the token and install it into the token,
1894   /// updating the token kind accordingly.
1895   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1896 
1897 private:
1898   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1899 
1900 public:
1901   /// Specifies the reason for poisoning an identifier.
1902   ///
1903   /// If that identifier is accessed while poisoned, then this reason will be
1904   /// used instead of the default "poisoned" diagnostic.
1905   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1906 
1907   /// Display reason for poisoned identifier.
1908   void HandlePoisonedIdentifier(Token & Identifier);
1909 
MaybeHandlePoisonedIdentifier(Token & Identifier)1910   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1911     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1912       if(II->isPoisoned()) {
1913         HandlePoisonedIdentifier(Identifier);
1914       }
1915     }
1916   }
1917 
1918 private:
1919   /// Identifiers used for SEH handling in Borland. These are only
1920   /// allowed in particular circumstances
1921   // __except block
1922   IdentifierInfo *Ident__exception_code,
1923                  *Ident___exception_code,
1924                  *Ident_GetExceptionCode;
1925   // __except filter expression
1926   IdentifierInfo *Ident__exception_info,
1927                  *Ident___exception_info,
1928                  *Ident_GetExceptionInfo;
1929   // __finally
1930   IdentifierInfo *Ident__abnormal_termination,
1931                  *Ident___abnormal_termination,
1932                  *Ident_AbnormalTermination;
1933 
1934   const char *getCurLexerEndPos();
1935   void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
1936 
1937 public:
1938   void PoisonSEHIdentifiers(bool Poison = true); // Borland
1939 
1940   /// Callback invoked when the lexer reads an identifier and has
1941   /// filled in the tokens IdentifierInfo member.
1942   ///
1943   /// This callback potentially macro expands it or turns it into a named
1944   /// token (like 'for').
1945   ///
1946   /// \returns true if we actually computed a token, false if we need to
1947   /// lex again.
1948   bool HandleIdentifier(Token &Identifier);
1949 
1950   /// Callback invoked when the lexer hits the end of the current file.
1951   ///
1952   /// This either returns the EOF token and returns true, or
1953   /// pops a level off the include stack and returns false, at which point the
1954   /// client should call lex again.
1955   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1956 
1957   /// Callback invoked when the current TokenLexer hits the end of its
1958   /// token stream.
1959   bool HandleEndOfTokenLexer(Token &Result);
1960 
1961   /// Callback invoked when the lexer sees a # token at the start of a
1962   /// line.
1963   ///
1964   /// This consumes the directive, modifies the lexer/preprocessor state, and
1965   /// advances the lexer(s) so that the next token read is the correct one.
1966   void HandleDirective(Token &Result);
1967 
1968   /// Ensure that the next token is a tok::eod token.
1969   ///
1970   /// If not, emit a diagnostic and consume up until the eod.
1971   /// If \p EnableMacros is true, then we consider macros that expand to zero
1972   /// tokens as being ok.
1973   ///
1974   /// \return The location of the end of the directive (the terminating
1975   /// newline).
1976   SourceLocation CheckEndOfDirective(const char *DirType,
1977                                      bool EnableMacros = false);
1978 
1979   /// Read and discard all tokens remaining on the current line until
1980   /// the tok::eod token is found. Returns the range of the skipped tokens.
1981   SourceRange DiscardUntilEndOfDirective();
1982 
1983   /// Returns true if the preprocessor has seen a use of
1984   /// __DATE__ or __TIME__ in the file so far.
SawDateOrTime()1985   bool SawDateOrTime() const {
1986     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1987   }
getCounterValue()1988   unsigned getCounterValue() const { return CounterValue; }
setCounterValue(unsigned V)1989   void setCounterValue(unsigned V) { CounterValue = V; }
1990 
1991   /// Retrieves the module that we're currently building, if any.
1992   Module *getCurrentModule();
1993 
1994   /// Allocate a new MacroInfo object with the provided SourceLocation.
1995   MacroInfo *AllocateMacroInfo(SourceLocation L);
1996 
1997   /// Turn the specified lexer token into a fully checked and spelled
1998   /// filename, e.g. as an operand of \#include.
1999   ///
2000   /// The caller is expected to provide a buffer that is large enough to hold
2001   /// the spelling of the filename, but is also expected to handle the case
2002   /// when this method decides to use a different buffer.
2003   ///
2004   /// \returns true if the input filename was in <>'s or false if it was
2005   /// in ""'s.
2006   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2007 
2008   /// Given a "foo" or \<foo> reference, look up the indicated file.
2009   ///
2010   /// Returns None on failure.  \p isAngled indicates whether the file
2011   /// reference is for system \#include's or not (i.e. using <> instead of "").
2012   Optional<FileEntryRef>
2013   LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2014              const DirectoryLookup *FromDir, const FileEntry *FromFile,
2015              const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath,
2016              SmallVectorImpl<char> *RelativePath,
2017              ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2018              bool *IsFrameworkFound, bool SkipCache = false);
2019 
2020   /// Get the DirectoryLookup structure used to find the current
2021   /// FileEntry, if CurLexer is non-null and if applicable.
2022   ///
2023   /// This allows us to implement \#include_next and find directory-specific
2024   /// properties.
GetCurDirLookup()2025   const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
2026 
2027   /// Return true if we're in the top-level file, not in a \#include.
2028   bool isInPrimaryFile() const;
2029 
2030   /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2031   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
2032   bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2033 
2034   bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2035                       bool *ShadowFlag = nullptr);
2036 
2037   void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2038   Module *LeaveSubmodule(bool ForPragma);
2039 
2040 private:
2041   friend void TokenLexer::ExpandFunctionArguments();
2042 
PushIncludeMacroStack()2043   void PushIncludeMacroStack() {
2044     assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
2045     IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
2046                                    std::move(CurLexer), CurPPLexer,
2047                                    std::move(CurTokenLexer), CurDirLookup);
2048     CurPPLexer = nullptr;
2049   }
2050 
PopIncludeMacroStack()2051   void PopIncludeMacroStack() {
2052     CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2053     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2054     CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2055     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
2056     CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2057     CurLexerKind = IncludeMacroStack.back().CurLexerKind;
2058     IncludeMacroStack.pop_back();
2059   }
2060 
2061   void PropagateLineStartLeadingSpaceInfo(Token &Result);
2062 
2063   /// Determine whether we need to create module macros for #defines in the
2064   /// current context.
2065   bool needModuleMacros() const;
2066 
2067   /// Update the set of active module macros and ambiguity flag for a module
2068   /// macro name.
2069   void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2070 
2071   DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2072                                                SourceLocation Loc);
2073   UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2074   VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2075                                                              bool isPublic);
2076 
2077   /// Lex and validate a macro name, which occurs after a
2078   /// \#define or \#undef.
2079   ///
2080   /// \param MacroNameTok Token that represents the name defined or undefined.
2081   /// \param IsDefineUndef Kind if preprocessor directive.
2082   /// \param ShadowFlag Points to flag that is set if macro name shadows
2083   ///                   a keyword.
2084   ///
2085   /// This emits a diagnostic, sets the token kind to eod,
2086   /// and discards the rest of the macro line if the macro name is invalid.
2087   void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2088                      bool *ShadowFlag = nullptr);
2089 
2090   /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2091   /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2092   /// doing so performs certain validity checks including (but not limited to):
2093   ///   - # (stringization) is followed by a macro parameter
2094   /// \param MacroNameTok - Token that represents the macro name
2095   /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2096   ///
2097   ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2098   ///  returns a nullptr if an invalid sequence of tokens is encountered.
2099   MacroInfo *ReadOptionalMacroParameterListAndBody(
2100       const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2101 
2102   /// The ( starting an argument list of a macro definition has just been read.
2103   /// Lex the rest of the parameters and the closing ), updating \p MI with
2104   /// what we learn and saving in \p LastTok the last token read.
2105   /// Return true if an error occurs parsing the arg list.
2106   bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2107 
2108   /// We just read a \#if or related directive and decided that the
2109   /// subsequent tokens are in the \#if'd out portion of the
2110   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
2111   /// FoundNonSkipPortion is true, then we have already emitted code for part of
2112   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2113   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2114   /// already seen one so a \#else directive is a duplicate.  When this returns,
2115   /// the caller can lex the first valid token.
2116   void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2117                                     SourceLocation IfTokenLoc,
2118                                     bool FoundNonSkipPortion, bool FoundElse,
2119                                     SourceLocation ElseLoc = SourceLocation());
2120 
2121   /// Information about the result for evaluating an expression for a
2122   /// preprocessor directive.
2123   struct DirectiveEvalResult {
2124     /// Whether the expression was evaluated as true or not.
2125     bool Conditional;
2126 
2127     /// True if the expression contained identifiers that were undefined.
2128     bool IncludedUndefinedIds;
2129 
2130     /// The source range for the expression.
2131     SourceRange ExprRange;
2132   };
2133 
2134   /// Evaluate an integer constant expression that may occur after a
2135   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2136   ///
2137   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2138   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
2139 
2140   /// Install the standard preprocessor pragmas:
2141   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2142   void RegisterBuiltinPragmas();
2143 
2144   /// Register builtin macros such as __LINE__ with the identifier table.
2145   void RegisterBuiltinMacros();
2146 
2147   /// If an identifier token is read that is to be expanded as a macro, handle
2148   /// it and return the next token as 'Tok'.  If we lexed a token, return true;
2149   /// otherwise the caller should lex again.
2150   bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2151 
2152   /// Cache macro expanded tokens for TokenLexers.
2153   //
2154   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2155   /// going to lex in the cache and when it finishes the tokens are removed
2156   /// from the end of the cache.
2157   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2158                                   ArrayRef<Token> tokens);
2159 
2160   void removeCachedMacroExpandedTokensOfLastLexer();
2161 
2162   /// Determine whether the next preprocessor token to be
2163   /// lexed is a '('.  If so, consume the token and return true, if not, this
2164   /// method should have no observable side-effect on the lexed tokens.
2165   bool isNextPPTokenLParen();
2166 
2167   /// After reading "MACRO(", this method is invoked to read all of the formal
2168   /// arguments specified for the macro invocation.  Returns null on error.
2169   MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2170                                        SourceLocation &MacroEnd);
2171 
2172   /// If an identifier token is read that is to be expanded
2173   /// as a builtin macro, handle it and return the next token as 'Tok'.
2174   void ExpandBuiltinMacro(Token &Tok);
2175 
2176   /// Read a \c _Pragma directive, slice it up, process it, then
2177   /// return the first token after the directive.
2178   /// This assumes that the \c _Pragma token has just been read into \p Tok.
2179   void Handle_Pragma(Token &Tok);
2180 
2181   /// Like Handle_Pragma except the pragma text is not enclosed within
2182   /// a string literal.
2183   void HandleMicrosoft__pragma(Token &Tok);
2184 
2185   /// Add a lexer to the top of the include stack and
2186   /// start lexing tokens from it instead of the current buffer.
2187   void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
2188 
2189   /// Set the FileID for the preprocessor predefines.
setPredefinesFileID(FileID FID)2190   void setPredefinesFileID(FileID FID) {
2191     assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2192     PredefinesFileID = FID;
2193   }
2194 
2195   /// Set the FileID for the PCH through header.
2196   void setPCHThroughHeaderFileID(FileID FID);
2197 
2198   /// Returns true if we are lexing from a file and not a
2199   /// pragma or a macro.
IsFileLexer(const Lexer * L,const PreprocessorLexer * P)2200   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2201     return L ? !L->isPragmaLexer() : P != nullptr;
2202   }
2203 
IsFileLexer(const IncludeStackInfo & I)2204   static bool IsFileLexer(const IncludeStackInfo& I) {
2205     return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2206   }
2207 
IsFileLexer()2208   bool IsFileLexer() const {
2209     return IsFileLexer(CurLexer.get(), CurPPLexer);
2210   }
2211 
2212   //===--------------------------------------------------------------------===//
2213   // Caching stuff.
2214   void CachingLex(Token &Result);
2215 
InCachingLexMode()2216   bool InCachingLexMode() const {
2217     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2218     // that we are past EOF, not that we are in CachingLex mode.
2219     return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2220   }
2221 
2222   void EnterCachingLexMode();
2223   void EnterCachingLexModeUnchecked();
2224 
ExitCachingLexMode()2225   void ExitCachingLexMode() {
2226     if (InCachingLexMode())
2227       RemoveTopOfLexerStack();
2228   }
2229 
2230   const Token &PeekAhead(unsigned N);
2231   void AnnotatePreviousCachedTokens(const Token &Tok);
2232 
2233   //===--------------------------------------------------------------------===//
2234   /// Handle*Directive - implement the various preprocessor directives.  These
2235   /// should side-effect the current preprocessor object so that the next call
2236   /// to Lex() will return the appropriate token next.
2237   void HandleLineDirective();
2238   void HandleDigitDirective(Token &Tok);
2239   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2240   void HandleIdentSCCSDirective(Token &Tok);
2241   void HandleMacroPublicDirective(Token &Tok);
2242   void HandleMacroPrivateDirective();
2243 
2244   /// An additional notification that can be produced by a header inclusion or
2245   /// import to tell the parser what happened.
2246   struct ImportAction {
2247     enum ActionKind {
2248       None,
2249       ModuleBegin,
2250       ModuleImport,
2251       SkippedModuleImport,
2252       Failure,
2253     } Kind;
2254     Module *ModuleForHeader = nullptr;
2255 
2256     ImportAction(ActionKind AK, Module *Mod = nullptr)
KindImportAction2257         : Kind(AK), ModuleForHeader(Mod) {
2258       assert((AK == None || Mod || AK == Failure) &&
2259              "no module for module action");
2260     }
2261   };
2262 
2263   Optional<FileEntryRef> LookupHeaderIncludeOrImport(
2264       const DirectoryLookup *&CurDir, StringRef &Filename,
2265       SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2266       const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2267       bool &IsMapped, const DirectoryLookup *LookupFrom,
2268       const FileEntry *LookupFromFile, StringRef &LookupFilename,
2269       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2270       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2271 
2272   // File inclusion.
2273   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2274                               const DirectoryLookup *LookupFrom = nullptr,
2275                               const FileEntry *LookupFromFile = nullptr);
2276   ImportAction
2277   HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2278                               Token &FilenameTok, SourceLocation EndLoc,
2279                               const DirectoryLookup *LookupFrom = nullptr,
2280                               const FileEntry *LookupFromFile = nullptr);
2281   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2282   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2283   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2284   void HandleMicrosoftImportDirective(Token &Tok);
2285 
2286 public:
2287   /// Check that the given module is available, producing a diagnostic if not.
2288   /// \return \c true if the check failed (because the module is not available).
2289   ///         \c false if the module appears to be usable.
2290   static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2291                                      const TargetInfo &TargetInfo,
2292                                      DiagnosticsEngine &Diags, Module *M);
2293 
2294   // Module inclusion testing.
2295   /// Find the module that owns the source or header file that
2296   /// \p Loc points to. If the location is in a file that was included
2297   /// into a module, or is outside any module, returns nullptr.
2298   Module *getModuleForLocation(SourceLocation Loc);
2299 
2300   /// We want to produce a diagnostic at location IncLoc concerning an
2301   /// unreachable effect at location MLoc (eg, where a desired entity was
2302   /// declared or defined). Determine whether the right way to make MLoc
2303   /// reachable is by #include, and if so, what header should be included.
2304   ///
2305   /// This is not necessarily fast, and might load unexpected module maps, so
2306   /// should only be called by code that intends to produce an error.
2307   ///
2308   /// \param IncLoc The location at which the missing effect was detected.
2309   /// \param MLoc A location within an unimported module at which the desired
2310   ///        effect occurred.
2311   /// \return A file that can be #included to provide the desired effect. Null
2312   ///         if no such file could be determined or if a #include is not
2313   ///         appropriate (eg, if a module should be imported instead).
2314   const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2315                                                     SourceLocation MLoc);
2316 
isRecordingPreamble()2317   bool isRecordingPreamble() const {
2318     return PreambleConditionalStack.isRecording();
2319   }
2320 
hasRecordedPreamble()2321   bool hasRecordedPreamble() const {
2322     return PreambleConditionalStack.hasRecordedPreamble();
2323   }
2324 
getPreambleConditionalStack()2325   ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2326       return PreambleConditionalStack.getStack();
2327   }
2328 
setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s)2329   void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2330     PreambleConditionalStack.setStack(s);
2331   }
2332 
setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,llvm::Optional<PreambleSkipInfo> SkipInfo)2333   void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,
2334                                              llvm::Optional<PreambleSkipInfo> SkipInfo) {
2335     PreambleConditionalStack.startReplaying();
2336     PreambleConditionalStack.setStack(s);
2337     PreambleConditionalStack.SkipInfo = SkipInfo;
2338   }
2339 
getPreambleSkipInfo()2340   llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2341     return PreambleConditionalStack.SkipInfo;
2342   }
2343 
2344 private:
2345   /// After processing predefined file, initialize the conditional stack from
2346   /// the preamble.
2347   void replayPreambleConditionalStack();
2348 
2349   // Macro handling.
2350   void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2351   void HandleUndefDirective();
2352 
2353   // Conditional Inclusion.
2354   void HandleIfdefDirective(Token &Result, const Token &HashToken,
2355                             bool isIfndef, bool ReadAnyTokensBeforeDirective);
2356   void HandleIfDirective(Token &IfToken, const Token &HashToken,
2357                          bool ReadAnyTokensBeforeDirective);
2358   void HandleEndifDirective(Token &EndifToken);
2359   void HandleElseDirective(Token &Result, const Token &HashToken);
2360   void HandleElifDirective(Token &ElifToken, const Token &HashToken);
2361 
2362   // Pragmas.
2363   void HandlePragmaDirective(PragmaIntroducer Introducer);
2364 
2365 public:
2366   void HandlePragmaOnce(Token &OnceTok);
2367   void HandlePragmaMark();
2368   void HandlePragmaPoison();
2369   void HandlePragmaSystemHeader(Token &SysHeaderTok);
2370   void HandlePragmaDependency(Token &DependencyTok);
2371   void HandlePragmaPushMacro(Token &Tok);
2372   void HandlePragmaPopMacro(Token &Tok);
2373   void HandlePragmaIncludeAlias(Token &Tok);
2374   void HandlePragmaModuleBuild(Token &Tok);
2375   void HandlePragmaHdrstop(Token &Tok);
2376   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2377 
2378   // Return true and store the first token only if any CommentHandler
2379   // has inserted some tokens and getCommentRetentionState() is false.
2380   bool HandleComment(Token &result, SourceRange Comment);
2381 
2382   /// A macro is used, update information about macros that need unused
2383   /// warnings.
2384   void markMacroAsUsed(MacroInfo *MI);
2385 
2386 private:
2387   Optional<unsigned>
2388   getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc);
2389 
2390   /// Contains the currently active skipped range mappings for skipping excluded
2391   /// conditional directives.
2392   ExcludedPreprocessorDirectiveSkipMapping
2393       *ExcludedConditionalDirectiveSkipMappings;
2394 };
2395 
2396 /// Abstract base class that describes a handler that will receive
2397 /// source ranges for each of the comments encountered in the source file.
2398 class CommentHandler {
2399 public:
2400   virtual ~CommentHandler();
2401 
2402   // The handler shall return true if it has pushed any tokens
2403   // to be read using e.g. EnterToken or EnterTokenStream.
2404   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
2405 };
2406 
2407 /// Abstract base class that describes a handler that will receive
2408 /// source ranges for empty lines encountered in the source file.
2409 class EmptylineHandler {
2410 public:
2411   virtual ~EmptylineHandler();
2412 
2413   // The handler handles empty lines.
2414   virtual void HandleEmptyline(SourceRange Range) = 0;
2415 };
2416 
2417 /// Registry of pragma handlers added by plugins
2418 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
2419 
2420 } // namespace clang
2421 
2422 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
2423