1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
16 
17 #include "clang/Basic/Diagnostic.h"
18 #include "clang/Basic/DiagnosticIDs.h"
19 #include "clang/Basic/IdentifierTable.h"
20 #include "clang/Basic/LLVM.h"
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/Module.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/HeaderSearch.h"
27 #include "clang/Lex/Lexer.h"
28 #include "clang/Lex/MacroInfo.h"
29 #include "clang/Lex/ModuleLoader.h"
30 #include "clang/Lex/ModuleMap.h"
31 #include "clang/Lex/PPCallbacks.h"
32 #include "clang/Lex/Token.h"
33 #include "clang/Lex/TokenLexer.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/FoldingSet.h"
37 #include "llvm/ADT/FunctionExtras.h"
38 #include "llvm/ADT/PointerUnion.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/ADT/SmallPtrSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/ADT/TinyPtrVector.h"
44 #include "llvm/ADT/iterator_range.h"
45 #include "llvm/Support/Allocator.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Registry.h"
48 #include <cassert>
49 #include <cstddef>
50 #include <cstdint>
51 #include <map>
52 #include <memory>
53 #include <optional>
54 #include <string>
55 #include <utility>
56 #include <vector>
57 
58 namespace llvm {
59 
60 template<unsigned InternalLen> class SmallString;
61 
62 } // namespace llvm
63 
64 namespace clang {
65 
66 class CodeCompletionHandler;
67 class CommentHandler;
68 class DirectoryEntry;
69 class EmptylineHandler;
70 class ExternalPreprocessorSource;
71 class FileEntry;
72 class FileManager;
73 class HeaderSearch;
74 class MacroArgs;
75 class PragmaHandler;
76 class PragmaNamespace;
77 class PreprocessingRecord;
78 class PreprocessorLexer;
79 class PreprocessorOptions;
80 class ScratchBuffer;
81 class TargetInfo;
82 
83 namespace Builtin {
84 class Context;
85 }
86 
87 /// Stores token information for comparing actual tokens with
88 /// predefined values.  Only handles simple tokens and identifiers.
89 class TokenValue {
90   tok::TokenKind Kind;
91   IdentifierInfo *II;
92 
93 public:
TokenValue(tok::TokenKind Kind)94   TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
95     assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
96     assert(Kind != tok::identifier &&
97            "Identifiers should be created by TokenValue(IdentifierInfo *)");
98     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
99     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
100   }
101 
TokenValue(IdentifierInfo * II)102   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
103 
104   bool operator==(const Token &Tok) const {
105     return Tok.getKind() == Kind &&
106         (!II || II == Tok.getIdentifierInfo());
107   }
108 };
109 
110 /// Context in which macro name is used.
111 enum MacroUse {
112   // other than #define or #undef
113   MU_Other  = 0,
114 
115   // macro name specified in #define
116   MU_Define = 1,
117 
118   // macro name specified in #undef
119   MU_Undef  = 2
120 };
121 
122 /// Engages in a tight little dance with the lexer to efficiently
123 /// preprocess tokens.
124 ///
125 /// Lexers know only about tokens within a single source file, and don't
126 /// know anything about preprocessor-level issues like the \#include stack,
127 /// token expansion, etc.
128 class Preprocessor {
129   friend class VAOptDefinitionContext;
130   friend class VariadicMacroScopeGuard;
131 
132   llvm::unique_function<void(const clang::Token &)> OnToken;
133   std::shared_ptr<PreprocessorOptions> PPOpts;
134   DiagnosticsEngine        *Diags;
135   const LangOptions &LangOpts;
136   const TargetInfo *Target = nullptr;
137   const TargetInfo *AuxTarget = nullptr;
138   FileManager       &FileMgr;
139   SourceManager     &SourceMgr;
140   std::unique_ptr<ScratchBuffer> ScratchBuf;
141   HeaderSearch      &HeaderInfo;
142   ModuleLoader      &TheModuleLoader;
143 
144   /// External source of macros.
145   ExternalPreprocessorSource *ExternalSource;
146 
147   /// A BumpPtrAllocator object used to quickly allocate and release
148   /// objects internal to the Preprocessor.
149   llvm::BumpPtrAllocator BP;
150 
151   /// Identifiers for builtin macros and other builtins.
152   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
153   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
154   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
155   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
156   IdentifierInfo *Ident__FILE_NAME__;              // __FILE_NAME__
157   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
158   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
159   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
160   IdentifierInfo *Ident__identifier;               // __identifier
161   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
162   IdentifierInfo *Ident__VA_OPT__;                 // __VA_OPT__
163   IdentifierInfo *Ident__has_feature;              // __has_feature
164   IdentifierInfo *Ident__has_extension;            // __has_extension
165   IdentifierInfo *Ident__has_builtin;              // __has_builtin
166   IdentifierInfo *Ident__has_constexpr_builtin;    // __has_constexpr_builtin
167   IdentifierInfo *Ident__has_attribute;            // __has_attribute
168   IdentifierInfo *Ident__has_include;              // __has_include
169   IdentifierInfo *Ident__has_include_next;         // __has_include_next
170   IdentifierInfo *Ident__has_warning;              // __has_warning
171   IdentifierInfo *Ident__is_identifier;            // __is_identifier
172   IdentifierInfo *Ident__building_module;          // __building_module
173   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
174   IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
175   IdentifierInfo *Ident__has_c_attribute;          // __has_c_attribute
176   IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
177   IdentifierInfo *Ident__is_target_arch;           // __is_target_arch
178   IdentifierInfo *Ident__is_target_vendor;         // __is_target_vendor
179   IdentifierInfo *Ident__is_target_os;             // __is_target_os
180   IdentifierInfo *Ident__is_target_environment;    // __is_target_environment
181   IdentifierInfo *Ident__is_target_variant_os;
182   IdentifierInfo *Ident__is_target_variant_environment;
183   IdentifierInfo *Ident__FLT_EVAL_METHOD__;        // __FLT_EVAL_METHOD
184 
185   // Weak, only valid (and set) while InMacroArgs is true.
186   Token* ArgMacro;
187 
188   SourceLocation DATELoc, TIMELoc;
189 
190   // FEM_UnsetOnCommandLine means that an explicit evaluation method was
191   // not specified on the command line. The target is queried to set the
192   // default evaluation method.
193   LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
194       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
195 
196   // The most recent pragma location where the floating point evaluation
197   // method was modified. This is used to determine whether the
198   // 'pragma clang fp eval_method' was used whithin the current scope.
199   SourceLocation LastFPEvalPragmaLocation;
200 
201   LangOptions::FPEvalMethodKind TUFPEvalMethod =
202       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
203 
204   // Next __COUNTER__ value, starts at 0.
205   unsigned CounterValue = 0;
206 
207   enum {
208     /// Maximum depth of \#includes.
209     MaxAllowedIncludeStackDepth = 200
210   };
211 
212   // State that is set before the preprocessor begins.
213   bool KeepComments : 1;
214   bool KeepMacroComments : 1;
215   bool SuppressIncludeNotFoundError : 1;
216 
217   // State that changes while the preprocessor runs:
218   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
219 
220   /// Whether the preprocessor owns the header search object.
221   bool OwnsHeaderSearch : 1;
222 
223   /// True if macro expansion is disabled.
224   bool DisableMacroExpansion : 1;
225 
226   /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
227   /// when parsing preprocessor directives.
228   bool MacroExpansionInDirectivesOverride : 1;
229 
230   class ResetMacroExpansionHelper;
231 
232   /// Whether we have already loaded macros from the external source.
233   mutable bool ReadMacrosFromExternalSource : 1;
234 
235   /// True if pragmas are enabled.
236   bool PragmasEnabled : 1;
237 
238   /// True if the current build action is a preprocessing action.
239   bool PreprocessedOutput : 1;
240 
241   /// True if we are currently preprocessing a #if or #elif directive
242   bool ParsingIfOrElifDirective;
243 
244   /// True if we are pre-expanding macro arguments.
245   bool InMacroArgPreExpansion;
246 
247   /// Mapping/lookup information for all identifiers in
248   /// the program, including program keywords.
249   mutable IdentifierTable Identifiers;
250 
251   /// This table contains all the selectors in the program.
252   ///
253   /// Unlike IdentifierTable above, this table *isn't* populated by the
254   /// preprocessor. It is declared/expanded here because its role/lifetime is
255   /// conceptually similar to the IdentifierTable. In addition, the current
256   /// control flow (in clang::ParseAST()), make it convenient to put here.
257   ///
258   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
259   /// the lifetime of the preprocessor.
260   SelectorTable Selectors;
261 
262   /// Information about builtins.
263   std::unique_ptr<Builtin::Context> BuiltinInfo;
264 
265   /// Tracks all of the pragmas that the client registered
266   /// with this preprocessor.
267   std::unique_ptr<PragmaNamespace> PragmaHandlers;
268 
269   /// Pragma handlers of the original source is stored here during the
270   /// parsing of a model file.
271   std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
272 
273   /// Tracks all of the comment handlers that the client registered
274   /// with this preprocessor.
275   std::vector<CommentHandler *> CommentHandlers;
276 
277   /// Empty line handler.
278   EmptylineHandler *Emptyline = nullptr;
279 
280   /// True to avoid tearing down the lexer etc on EOF
281   bool IncrementalProcessing = false;
282 
283 public:
284   /// The kind of translation unit we are processing.
285   const TranslationUnitKind TUKind;
286 
287 private:
288   /// The code-completion handler.
289   CodeCompletionHandler *CodeComplete = nullptr;
290 
291   /// The file that we're performing code-completion for, if any.
292   const FileEntry *CodeCompletionFile = nullptr;
293 
294   /// The offset in file for the code-completion point.
295   unsigned CodeCompletionOffset = 0;
296 
297   /// The location for the code-completion point. This gets instantiated
298   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
299   SourceLocation CodeCompletionLoc;
300 
301   /// The start location for the file of the code-completion point.
302   ///
303   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
304   /// for preprocessing.
305   SourceLocation CodeCompletionFileLoc;
306 
307   /// The source location of the \c import contextual keyword we just
308   /// lexed, if any.
309   SourceLocation ModuleImportLoc;
310 
311   /// The import path for named module that we're currently processing.
312   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath;
313 
314   /// Whether the import is an `@import` or a standard c++ modules import.
315   bool IsAtImport = false;
316 
317   /// Whether the last token we lexed was an '@'.
318   bool LastTokenWasAt = false;
319 
320   /// A position within a C++20 import-seq.
321   class StdCXXImportSeq {
322   public:
323     enum State : int {
324       // Positive values represent a number of unclosed brackets.
325       AtTopLevel = 0,
326       AfterTopLevelTokenSeq = -1,
327       AfterExport = -2,
328       AfterImportSeq = -3,
329     };
330 
StdCXXImportSeq(State S)331     StdCXXImportSeq(State S) : S(S) {}
332 
333     /// Saw any kind of open bracket.
handleOpenBracket()334     void handleOpenBracket() {
335       S = static_cast<State>(std::max<int>(S, 0) + 1);
336     }
337     /// Saw any kind of close bracket other than '}'.
handleCloseBracket()338     void handleCloseBracket() {
339       S = static_cast<State>(std::max<int>(S, 1) - 1);
340     }
341     /// Saw a close brace.
handleCloseBrace()342     void handleCloseBrace() {
343       handleCloseBracket();
344       if (S == AtTopLevel && !AfterHeaderName)
345         S = AfterTopLevelTokenSeq;
346     }
347     /// Saw a semicolon.
handleSemi()348     void handleSemi() {
349       if (atTopLevel()) {
350         S = AfterTopLevelTokenSeq;
351         AfterHeaderName = false;
352       }
353     }
354 
355     /// Saw an 'export' identifier.
handleExport()356     void handleExport() {
357       if (S == AfterTopLevelTokenSeq)
358         S = AfterExport;
359       else if (S <= 0)
360         S = AtTopLevel;
361     }
362     /// Saw an 'import' identifier.
handleImport()363     void handleImport() {
364       if (S == AfterTopLevelTokenSeq || S == AfterExport)
365         S = AfterImportSeq;
366       else if (S <= 0)
367         S = AtTopLevel;
368     }
369 
370     /// Saw a 'header-name' token; do not recognize any more 'import' tokens
371     /// until we reach a top-level semicolon.
handleHeaderName()372     void handleHeaderName() {
373       if (S == AfterImportSeq)
374         AfterHeaderName = true;
375       handleMisc();
376     }
377 
378     /// Saw any other token.
handleMisc()379     void handleMisc() {
380       if (S <= 0)
381         S = AtTopLevel;
382     }
383 
atTopLevel()384     bool atTopLevel() { return S <= 0; }
afterImportSeq()385     bool afterImportSeq() { return S == AfterImportSeq; }
afterTopLevelSeq()386     bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
387 
388   private:
389     State S;
390     /// Whether we're in the pp-import-suffix following the header-name in a
391     /// pp-import. If so, a close-brace is not sufficient to end the
392     /// top-level-token-seq of an import-seq.
393     bool AfterHeaderName = false;
394   };
395 
396   /// Our current position within a C++20 import-seq.
397   StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq;
398 
399   /// Track whether we are in a Global Module Fragment
400   class TrackGMF {
401   public:
402     enum GMFState : int {
403       GMFActive = 1,
404       MaybeGMF = 0,
405       BeforeGMFIntroducer = -1,
406       GMFAbsentOrEnded = -2,
407     };
408 
TrackGMF(GMFState S)409     TrackGMF(GMFState S) : S(S) {}
410 
411     /// Saw a semicolon.
handleSemi()412     void handleSemi() {
413       // If it is immediately after the first instance of the module keyword,
414       // then that introduces the GMF.
415       if (S == MaybeGMF)
416         S = GMFActive;
417     }
418 
419     /// Saw an 'export' identifier.
handleExport()420     void handleExport() {
421       // The presence of an 'export' keyword always ends or excludes a GMF.
422       S = GMFAbsentOrEnded;
423     }
424 
425     /// Saw an 'import' identifier.
handleImport(bool AfterTopLevelTokenSeq)426     void handleImport(bool AfterTopLevelTokenSeq) {
427       // If we see this before any 'module' kw, then we have no GMF.
428       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
429         S = GMFAbsentOrEnded;
430     }
431 
432     /// Saw a 'module' identifier.
handleModule(bool AfterTopLevelTokenSeq)433     void handleModule(bool AfterTopLevelTokenSeq) {
434       // This was the first module identifier and not preceded by any token
435       // that would exclude a GMF.  It could begin a GMF, but only if directly
436       // followed by a semicolon.
437       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
438         S = MaybeGMF;
439       else
440         S = GMFAbsentOrEnded;
441     }
442 
443     /// Saw any other token.
handleMisc()444     void handleMisc() {
445       // We saw something other than ; after the 'module' kw, so not a GMF.
446       if (S == MaybeGMF)
447         S = GMFAbsentOrEnded;
448     }
449 
inGMF()450     bool inGMF() { return S == GMFActive; }
451 
452   private:
453     /// Track the transitions into and out of a Global Module Fragment,
454     /// if one is present.
455     GMFState S;
456   };
457 
458   TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
459 
460   /// Track the status of the c++20 module decl.
461   ///
462   ///   module-declaration:
463   ///     'export'[opt] 'module' module-name module-partition[opt]
464   ///     attribute-specifier-seq[opt] ';'
465   ///
466   ///   module-name:
467   ///     module-name-qualifier[opt] identifier
468   ///
469   ///   module-partition:
470   ///     ':' module-name-qualifier[opt] identifier
471   ///
472   ///   module-name-qualifier:
473   ///     identifier '.'
474   ///     module-name-qualifier identifier '.'
475   ///
476   /// Transition state:
477   ///
478   ///   NotAModuleDecl --- export ---> FoundExport
479   ///   NotAModuleDecl --- module ---> ImplementationCandidate
480   ///   FoundExport --- module ---> InterfaceCandidate
481   ///   ImplementationCandidate --- Identifier ---> ImplementationCandidate
482   ///   ImplementationCandidate --- period ---> ImplementationCandidate
483   ///   ImplementationCandidate --- colon ---> ImplementationCandidate
484   ///   InterfaceCandidate --- Identifier ---> InterfaceCandidate
485   ///   InterfaceCandidate --- period ---> InterfaceCandidate
486   ///   InterfaceCandidate --- colon ---> InterfaceCandidate
487   ///   ImplementationCandidate --- Semi ---> NamedModuleImplementation
488   ///   NamedModuleInterface --- Semi ---> NamedModuleInterface
489   ///   NamedModuleImplementation --- Anything ---> NamedModuleImplementation
490   ///   NamedModuleInterface --- Anything ---> NamedModuleInterface
491   ///
492   /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
493   /// soon since we don't support any module attributes yet.
494   class ModuleDeclSeq {
495     enum ModuleDeclState : int {
496       NotAModuleDecl,
497       FoundExport,
498       InterfaceCandidate,
499       ImplementationCandidate,
500       NamedModuleInterface,
501       NamedModuleImplementation,
502     };
503 
504   public:
505     ModuleDeclSeq() = default;
506 
handleExport()507     void handleExport() {
508       if (State == NotAModuleDecl)
509         State = FoundExport;
510       else if (!isNamedModule())
511         reset();
512     }
513 
handleModule()514     void handleModule() {
515       if (State == FoundExport)
516         State = InterfaceCandidate;
517       else if (State == NotAModuleDecl)
518         State = ImplementationCandidate;
519       else if (!isNamedModule())
520         reset();
521     }
522 
handleIdentifier(IdentifierInfo * Identifier)523     void handleIdentifier(IdentifierInfo *Identifier) {
524       if (isModuleCandidate() && Identifier)
525         Name += Identifier->getName().str();
526       else if (!isNamedModule())
527         reset();
528     }
529 
handleColon()530     void handleColon() {
531       if (isModuleCandidate())
532         Name += ":";
533       else if (!isNamedModule())
534         reset();
535     }
536 
handlePeriod()537     void handlePeriod() {
538       if (isModuleCandidate())
539         Name += ".";
540       else if (!isNamedModule())
541         reset();
542     }
543 
handleSemi()544     void handleSemi() {
545       if (!Name.empty() && isModuleCandidate()) {
546         if (State == InterfaceCandidate)
547           State = NamedModuleInterface;
548         else if (State == ImplementationCandidate)
549           State = NamedModuleImplementation;
550         else
551           llvm_unreachable("Unimaged ModuleDeclState.");
552       } else if (!isNamedModule())
553         reset();
554     }
555 
handleMisc()556     void handleMisc() {
557       if (!isNamedModule())
558         reset();
559     }
560 
isModuleCandidate()561     bool isModuleCandidate() const {
562       return State == InterfaceCandidate || State == ImplementationCandidate;
563     }
564 
isNamedModule()565     bool isNamedModule() const {
566       return State == NamedModuleInterface ||
567              State == NamedModuleImplementation;
568     }
569 
isNamedInterface()570     bool isNamedInterface() const { return State == NamedModuleInterface; }
571 
isImplementationUnit()572     bool isImplementationUnit() const {
573       return State == NamedModuleImplementation && !getName().contains(':');
574     }
575 
getName()576     StringRef getName() const {
577       assert(isNamedModule() && "Can't get name from a non named module");
578       return Name;
579     }
580 
getPrimaryName()581     StringRef getPrimaryName() const {
582       assert(isNamedModule() && "Can't get name from a non named module");
583       return getName().split(':').first;
584     }
585 
reset()586     void reset() {
587       Name.clear();
588       State = NotAModuleDecl;
589     }
590 
591   private:
592     ModuleDeclState State = NotAModuleDecl;
593     std::string Name;
594   };
595 
596   ModuleDeclSeq ModuleDeclState;
597 
598   /// Whether the module import expects an identifier next. Otherwise,
599   /// it expects a '.' or ';'.
600   bool ModuleImportExpectsIdentifier = false;
601 
602   /// The identifier and source location of the currently-active
603   /// \#pragma clang arc_cf_code_audited begin.
604   std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
605 
606   /// The source location of the currently-active
607   /// \#pragma clang assume_nonnull begin.
608   SourceLocation PragmaAssumeNonNullLoc;
609 
610   /// Set only for preambles which end with an active
611   /// \#pragma clang assume_nonnull begin.
612   ///
613   /// When the preamble is loaded into the main file,
614   /// `PragmaAssumeNonNullLoc` will be set to this to
615   /// replay the unterminated assume_nonnull.
616   SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
617 
618   /// True if we hit the code-completion point.
619   bool CodeCompletionReached = false;
620 
621   /// The code completion token containing the information
622   /// on the stem that is to be code completed.
623   IdentifierInfo *CodeCompletionII = nullptr;
624 
625   /// Range for the code completion token.
626   SourceRange CodeCompletionTokenRange;
627 
628   /// The directory that the main file should be considered to occupy,
629   /// if it does not correspond to a real file (as happens when building a
630   /// module).
631   OptionalDirectoryEntryRef MainFileDir;
632 
633   /// The number of bytes that we will initially skip when entering the
634   /// main file, along with a flag that indicates whether skipping this number
635   /// of bytes will place the lexer at the start of a line.
636   ///
637   /// This is used when loading a precompiled preamble.
638   std::pair<int, bool> SkipMainFilePreamble;
639 
640   /// Whether we hit an error due to reaching max allowed include depth. Allows
641   /// to avoid hitting the same error over and over again.
642   bool HasReachedMaxIncludeDepth = false;
643 
644   /// The number of currently-active calls to Lex.
645   ///
646   /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
647   /// require asking for multiple additional tokens. This counter makes it
648   /// possible for Lex to detect whether it's producing a token for the end
649   /// of phase 4 of translation or for some other situation.
650   unsigned LexLevel = 0;
651 
652   /// The number of (LexLevel 0) preprocessor tokens.
653   unsigned TokenCount = 0;
654 
655   /// Preprocess every token regardless of LexLevel.
656   bool PreprocessToken = false;
657 
658   /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
659   /// warning, or zero for unlimited.
660   unsigned MaxTokens = 0;
661   SourceLocation MaxTokensOverrideLoc;
662 
663 public:
664   struct PreambleSkipInfo {
665     SourceLocation HashTokenLoc;
666     SourceLocation IfTokenLoc;
667     bool FoundNonSkipPortion;
668     bool FoundElse;
669     SourceLocation ElseLoc;
670 
PreambleSkipInfoPreambleSkipInfo671     PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
672                      bool FoundNonSkipPortion, bool FoundElse,
673                      SourceLocation ElseLoc)
674         : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
675           FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
676           ElseLoc(ElseLoc) {}
677   };
678 
679   using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
680 
681 private:
682   friend class ASTReader;
683   friend class MacroArgs;
684 
685   class PreambleConditionalStackStore {
686     enum State {
687       Off = 0,
688       Recording = 1,
689       Replaying = 2,
690     };
691 
692   public:
693     PreambleConditionalStackStore() = default;
694 
startRecording()695     void startRecording() { ConditionalStackState = Recording; }
startReplaying()696     void startReplaying() { ConditionalStackState = Replaying; }
isRecording()697     bool isRecording() const { return ConditionalStackState == Recording; }
isReplaying()698     bool isReplaying() const { return ConditionalStackState == Replaying; }
699 
getStack()700     ArrayRef<PPConditionalInfo> getStack() const {
701       return ConditionalStack;
702     }
703 
doneReplaying()704     void doneReplaying() {
705       ConditionalStack.clear();
706       ConditionalStackState = Off;
707     }
708 
setStack(ArrayRef<PPConditionalInfo> s)709     void setStack(ArrayRef<PPConditionalInfo> s) {
710       if (!isRecording() && !isReplaying())
711         return;
712       ConditionalStack.clear();
713       ConditionalStack.append(s.begin(), s.end());
714     }
715 
hasRecordedPreamble()716     bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
717 
reachedEOFWhileSkipping()718     bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
719 
clearSkipInfo()720     void clearSkipInfo() { SkipInfo.reset(); }
721 
722     std::optional<PreambleSkipInfo> SkipInfo;
723 
724   private:
725     SmallVector<PPConditionalInfo, 4> ConditionalStack;
726     State ConditionalStackState = Off;
727   } PreambleConditionalStack;
728 
729   /// The current top of the stack that we're lexing from if
730   /// not expanding a macro and we are lexing directly from source code.
731   ///
732   /// Only one of CurLexer, or CurTokenLexer will be non-null.
733   std::unique_ptr<Lexer> CurLexer;
734 
735   /// The current top of the stack that we're lexing from
736   /// if not expanding a macro.
737   ///
738   /// This is an alias for CurLexer.
739   PreprocessorLexer *CurPPLexer = nullptr;
740 
741   /// Used to find the current FileEntry, if CurLexer is non-null
742   /// and if applicable.
743   ///
744   /// This allows us to implement \#include_next and find directory-specific
745   /// properties.
746   ConstSearchDirIterator CurDirLookup = nullptr;
747 
748   /// The current macro we are expanding, if we are expanding a macro.
749   ///
750   /// One of CurLexer and CurTokenLexer must be null.
751   std::unique_ptr<TokenLexer> CurTokenLexer;
752 
753   /// The kind of lexer we're currently working with.
754   typedef bool (*LexerCallback)(Preprocessor &, Token &);
755   LexerCallback CurLexerCallback = &CLK_Lexer;
756 
757   /// If the current lexer is for a submodule that is being built, this
758   /// is that submodule.
759   Module *CurLexerSubmodule = nullptr;
760 
761   /// Keeps track of the stack of files currently
762   /// \#included, and macros currently being expanded from, not counting
763   /// CurLexer/CurTokenLexer.
764   struct IncludeStackInfo {
765     LexerCallback               CurLexerCallback;
766     Module                     *TheSubmodule;
767     std::unique_ptr<Lexer>      TheLexer;
768     PreprocessorLexer          *ThePPLexer;
769     std::unique_ptr<TokenLexer> TheTokenLexer;
770     ConstSearchDirIterator      TheDirLookup;
771 
772     // The following constructors are completely useless copies of the default
773     // versions, only needed to pacify MSVC.
IncludeStackInfoIncludeStackInfo774     IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule,
775                      std::unique_ptr<Lexer> &&TheLexer,
776                      PreprocessorLexer *ThePPLexer,
777                      std::unique_ptr<TokenLexer> &&TheTokenLexer,
778                      ConstSearchDirIterator TheDirLookup)
779         : CurLexerCallback(std::move(CurLexerCallback)),
780           TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
781           ThePPLexer(std::move(ThePPLexer)),
782           TheTokenLexer(std::move(TheTokenLexer)),
783           TheDirLookup(std::move(TheDirLookup)) {}
784   };
785   std::vector<IncludeStackInfo> IncludeMacroStack;
786 
787   /// Actions invoked when some preprocessor activity is
788   /// encountered (e.g. a file is \#included, etc).
789   std::unique_ptr<PPCallbacks> Callbacks;
790 
791   struct MacroExpandsInfo {
792     Token Tok;
793     MacroDefinition MD;
794     SourceRange Range;
795 
MacroExpandsInfoMacroExpandsInfo796     MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
797         : Tok(Tok), MD(MD), Range(Range) {}
798   };
799   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
800 
801   /// Information about a name that has been used to define a module macro.
802   struct ModuleMacroInfo {
803     /// The most recent macro directive for this identifier.
804     MacroDirective *MD;
805 
806     /// The active module macros for this identifier.
807     llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
808 
809     /// The generation number at which we last updated ActiveModuleMacros.
810     /// \see Preprocessor::VisibleModules.
811     unsigned ActiveModuleMacrosGeneration = 0;
812 
813     /// Whether this macro name is ambiguous.
814     bool IsAmbiguous = false;
815 
816     /// The module macros that are overridden by this macro.
817     llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
818 
ModuleMacroInfoModuleMacroInfo819     ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
820   };
821 
822   /// The state of a macro for an identifier.
823   class MacroState {
824     mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
825 
getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)826     ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
827                                    const IdentifierInfo *II) const {
828       if (II->isOutOfDate())
829         PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
830       // FIXME: Find a spare bit on IdentifierInfo and store a
831       //        HasModuleMacros flag.
832       if (!II->hasMacroDefinition() ||
833           (!PP.getLangOpts().Modules &&
834            !PP.getLangOpts().ModulesLocalVisibility) ||
835           !PP.CurSubmoduleState->VisibleModules.getGeneration())
836         return nullptr;
837 
838       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
839       if (!Info) {
840         Info = new (PP.getPreprocessorAllocator())
841             ModuleMacroInfo(State.get<MacroDirective *>());
842         State = Info;
843       }
844 
845       if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
846           Info->ActiveModuleMacrosGeneration)
847         PP.updateModuleMacroInfo(II, *Info);
848       return Info;
849     }
850 
851   public:
MacroState()852     MacroState() : MacroState(nullptr) {}
MacroState(MacroDirective * MD)853     MacroState(MacroDirective *MD) : State(MD) {}
854 
MacroState(MacroState && O)855     MacroState(MacroState &&O) noexcept : State(O.State) {
856       O.State = (MacroDirective *)nullptr;
857     }
858 
859     MacroState &operator=(MacroState &&O) noexcept {
860       auto S = O.State;
861       O.State = (MacroDirective *)nullptr;
862       State = S;
863       return *this;
864     }
865 
~MacroState()866     ~MacroState() {
867       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
868         Info->~ModuleMacroInfo();
869     }
870 
getLatest()871     MacroDirective *getLatest() const {
872       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
873         return Info->MD;
874       return State.get<MacroDirective*>();
875     }
876 
setLatest(MacroDirective * MD)877     void setLatest(MacroDirective *MD) {
878       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
879         Info->MD = MD;
880       else
881         State = MD;
882     }
883 
isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)884     bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
885       auto *Info = getModuleInfo(PP, II);
886       return Info ? Info->IsAmbiguous : false;
887     }
888 
889     ArrayRef<ModuleMacro *>
getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)890     getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
891       if (auto *Info = getModuleInfo(PP, II))
892         return Info->ActiveModuleMacros;
893       return std::nullopt;
894     }
895 
findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)896     MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
897                                                SourceManager &SourceMgr) const {
898       // FIXME: Incorporate module macros into the result of this.
899       if (auto *Latest = getLatest())
900         return Latest->findDirectiveAtLoc(Loc, SourceMgr);
901       return {};
902     }
903 
overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)904     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
905       if (auto *Info = getModuleInfo(PP, II)) {
906         Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
907                                       Info->ActiveModuleMacros.begin(),
908                                       Info->ActiveModuleMacros.end());
909         Info->ActiveModuleMacros.clear();
910         Info->IsAmbiguous = false;
911       }
912     }
913 
getOverriddenMacros()914     ArrayRef<ModuleMacro*> getOverriddenMacros() const {
915       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
916         return Info->OverriddenMacros;
917       return std::nullopt;
918     }
919 
setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)920     void setOverriddenMacros(Preprocessor &PP,
921                              ArrayRef<ModuleMacro *> Overrides) {
922       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
923       if (!Info) {
924         if (Overrides.empty())
925           return;
926         Info = new (PP.getPreprocessorAllocator())
927             ModuleMacroInfo(State.get<MacroDirective *>());
928         State = Info;
929       }
930       Info->OverriddenMacros.clear();
931       Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
932                                     Overrides.begin(), Overrides.end());
933       Info->ActiveModuleMacrosGeneration = 0;
934     }
935   };
936 
937   /// For each IdentifierInfo that was associated with a macro, we
938   /// keep a mapping to the history of all macro definitions and #undefs in
939   /// the reverse order (the latest one is in the head of the list).
940   ///
941   /// This mapping lives within the \p CurSubmoduleState.
942   using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
943 
944   struct SubmoduleState;
945 
946   /// Information about a submodule that we're currently building.
947   struct BuildingSubmoduleInfo {
948     /// The module that we are building.
949     Module *M;
950 
951     /// The location at which the module was included.
952     SourceLocation ImportLoc;
953 
954     /// Whether we entered this submodule via a pragma.
955     bool IsPragma;
956 
957     /// The previous SubmoduleState.
958     SubmoduleState *OuterSubmoduleState;
959 
960     /// The number of pending module macro names when we started building this.
961     unsigned OuterPendingModuleMacroNames;
962 
BuildingSubmoduleInfoBuildingSubmoduleInfo963     BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
964                           SubmoduleState *OuterSubmoduleState,
965                           unsigned OuterPendingModuleMacroNames)
966         : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
967           OuterSubmoduleState(OuterSubmoduleState),
968           OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
969   };
970   SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
971 
972   /// Information about a submodule's preprocessor state.
973   struct SubmoduleState {
974     /// The macros for the submodule.
975     MacroMap Macros;
976 
977     /// The set of modules that are visible within the submodule.
978     VisibleModuleSet VisibleModules;
979 
980     // FIXME: CounterValue?
981     // FIXME: PragmaPushMacroInfo?
982   };
983   std::map<Module *, SubmoduleState> Submodules;
984 
985   /// The preprocessor state for preprocessing outside of any submodule.
986   SubmoduleState NullSubmoduleState;
987 
988   /// The current submodule state. Will be \p NullSubmoduleState if we're not
989   /// in a submodule.
990   SubmoduleState *CurSubmoduleState;
991 
992   /// The files that have been included.
993   IncludedFilesSet IncludedFiles;
994 
995   /// The set of top-level modules that affected preprocessing, but were not
996   /// imported.
997   llvm::SmallSetVector<Module *, 2> AffectingClangModules;
998 
999   /// The set of known macros exported from modules.
1000   llvm::FoldingSet<ModuleMacro> ModuleMacros;
1001 
1002   /// The names of potential module macros that we've not yet processed.
1003   llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
1004 
1005   /// The list of module macros, for each identifier, that are not overridden by
1006   /// any other module macro.
1007   llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
1008       LeafModuleMacros;
1009 
1010   /// Macros that we want to warn because they are not used at the end
1011   /// of the translation unit.
1012   ///
1013   /// We store just their SourceLocations instead of
1014   /// something like MacroInfo*. The benefit of this is that when we are
1015   /// deserializing from PCH, we don't need to deserialize identifier & macros
1016   /// just so that we can report that they are unused, we just warn using
1017   /// the SourceLocations of this set (that will be filled by the ASTReader).
1018   using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
1019   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
1020 
1021   /// This is a pair of an optional message and source location used for pragmas
1022   /// that annotate macros like pragma clang restrict_expansion and pragma clang
1023   /// deprecated. This pair stores the optional message and the location of the
1024   /// annotation pragma for use producing diagnostics and notes.
1025   using MsgLocationPair = std::pair<std::string, SourceLocation>;
1026 
1027   struct MacroAnnotationInfo {
1028     SourceLocation Location;
1029     std::string Message;
1030   };
1031 
1032   struct MacroAnnotations {
1033     std::optional<MacroAnnotationInfo> DeprecationInfo;
1034     std::optional<MacroAnnotationInfo> RestrictExpansionInfo;
1035     std::optional<SourceLocation> FinalAnnotationLoc;
1036 
makeDeprecationMacroAnnotations1037     static MacroAnnotations makeDeprecation(SourceLocation Loc,
1038                                             std::string Msg) {
1039       return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)},
1040                               std::nullopt, std::nullopt};
1041     }
1042 
makeRestrictExpansionMacroAnnotations1043     static MacroAnnotations makeRestrictExpansion(SourceLocation Loc,
1044                                                   std::string Msg) {
1045       return MacroAnnotations{
1046           std::nullopt, MacroAnnotationInfo{Loc, std::move(Msg)}, std::nullopt};
1047     }
1048 
makeFinalMacroAnnotations1049     static MacroAnnotations makeFinal(SourceLocation Loc) {
1050       return MacroAnnotations{std::nullopt, std::nullopt, Loc};
1051     }
1052   };
1053 
1054   /// Warning information for macro annotations.
1055   llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
1056 
1057   /// A "freelist" of MacroArg objects that can be
1058   /// reused for quick allocation.
1059   MacroArgs *MacroArgCache = nullptr;
1060 
1061   /// For each IdentifierInfo used in a \#pragma push_macro directive,
1062   /// we keep a MacroInfo stack used to restore the previous macro value.
1063   llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
1064       PragmaPushMacroInfo;
1065 
1066   // Various statistics we track for performance analysis.
1067   unsigned NumDirectives = 0;
1068   unsigned NumDefined = 0;
1069   unsigned NumUndefined = 0;
1070   unsigned NumPragma = 0;
1071   unsigned NumIf = 0;
1072   unsigned NumElse = 0;
1073   unsigned NumEndif = 0;
1074   unsigned NumEnteredSourceFiles = 0;
1075   unsigned MaxIncludeStackDepth = 0;
1076   unsigned NumMacroExpanded = 0;
1077   unsigned NumFnMacroExpanded = 0;
1078   unsigned NumBuiltinMacroExpanded = 0;
1079   unsigned NumFastMacroExpanded = 0;
1080   unsigned NumTokenPaste = 0;
1081   unsigned NumFastTokenPaste = 0;
1082   unsigned NumSkipped = 0;
1083 
1084   /// The predefined macros that preprocessor should use from the
1085   /// command line etc.
1086   std::string Predefines;
1087 
1088   /// The file ID for the preprocessor predefines.
1089   FileID PredefinesFileID;
1090 
1091   /// The file ID for the PCH through header.
1092   FileID PCHThroughHeaderFileID;
1093 
1094   /// Whether tokens are being skipped until a #pragma hdrstop is seen.
1095   bool SkippingUntilPragmaHdrStop = false;
1096 
1097   /// Whether tokens are being skipped until the through header is seen.
1098   bool SkippingUntilPCHThroughHeader = false;
1099 
1100   /// \{
1101   /// Cache of macro expanders to reduce malloc traffic.
1102   enum { TokenLexerCacheSize = 8 };
1103   unsigned NumCachedTokenLexers;
1104   std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
1105   /// \}
1106 
1107   /// Keeps macro expanded tokens for TokenLexers.
1108   //
1109   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1110   /// going to lex in the cache and when it finishes the tokens are removed
1111   /// from the end of the cache.
1112   SmallVector<Token, 16> MacroExpandedTokens;
1113   std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
1114 
1115   /// A record of the macro definitions and expansions that
1116   /// occurred during preprocessing.
1117   ///
1118   /// This is an optional side structure that can be enabled with
1119   /// \c createPreprocessingRecord() prior to preprocessing.
1120   PreprocessingRecord *Record = nullptr;
1121 
1122   /// Cached tokens state.
1123   using CachedTokensTy = SmallVector<Token, 1>;
1124 
1125   /// Cached tokens are stored here when we do backtracking or
1126   /// lookahead. They are "lexed" by the CachingLex() method.
1127   CachedTokensTy CachedTokens;
1128 
1129   /// The position of the cached token that CachingLex() should
1130   /// "lex" next.
1131   ///
1132   /// If it points beyond the CachedTokens vector, it means that a normal
1133   /// Lex() should be invoked.
1134   CachedTokensTy::size_type CachedLexPos = 0;
1135 
1136   /// Stack of backtrack positions, allowing nested backtracks.
1137   ///
1138   /// The EnableBacktrackAtThisPos() method pushes a position to
1139   /// indicate where CachedLexPos should be set when the BackTrack() method is
1140   /// invoked (at which point the last position is popped).
1141   std::vector<CachedTokensTy::size_type> BacktrackPositions;
1142 
1143   /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
1144   /// This is used to guard against calling this function recursively.
1145   ///
1146   /// See comments at the use-site for more context about why it is needed.
1147   bool SkippingExcludedConditionalBlock = false;
1148 
1149   /// Keeps track of skipped range mappings that were recorded while skipping
1150   /// excluded conditional directives. It maps the source buffer pointer at
1151   /// the beginning of a skipped block, to the number of bytes that should be
1152   /// skipped.
1153   llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
1154 
1155   void updateOutOfDateIdentifier(IdentifierInfo &II) const;
1156 
1157 public:
1158   Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
1159                DiagnosticsEngine &diags, const LangOptions &LangOpts,
1160                SourceManager &SM, HeaderSearch &Headers,
1161                ModuleLoader &TheModuleLoader,
1162                IdentifierInfoLookup *IILookup = nullptr,
1163                bool OwnsHeaderSearch = false,
1164                TranslationUnitKind TUKind = TU_Complete);
1165 
1166   ~Preprocessor();
1167 
1168   /// Initialize the preprocessor using information about the target.
1169   ///
1170   /// \param Target is owned by the caller and must remain valid for the
1171   /// lifetime of the preprocessor.
1172   /// \param AuxTarget is owned by the caller and must remain valid for
1173   /// the lifetime of the preprocessor.
1174   void Initialize(const TargetInfo &Target,
1175                   const TargetInfo *AuxTarget = nullptr);
1176 
1177   /// Initialize the preprocessor to parse a model file
1178   ///
1179   /// To parse model files the preprocessor of the original source is reused to
1180   /// preserver the identifier table. However to avoid some duplicate
1181   /// information in the preprocessor some cleanup is needed before it is used
1182   /// to parse model files. This method does that cleanup.
1183   void InitializeForModelFile();
1184 
1185   /// Cleanup after model file parsing
1186   void FinalizeForModelFile();
1187 
1188   /// Retrieve the preprocessor options used to initialize this
1189   /// preprocessor.
getPreprocessorOpts()1190   PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
1191 
getDiagnostics()1192   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
setDiagnostics(DiagnosticsEngine & D)1193   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
1194 
getLangOpts()1195   const LangOptions &getLangOpts() const { return LangOpts; }
getTargetInfo()1196   const TargetInfo &getTargetInfo() const { return *Target; }
getAuxTargetInfo()1197   const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
getFileManager()1198   FileManager &getFileManager() const { return FileMgr; }
getSourceManager()1199   SourceManager &getSourceManager() const { return SourceMgr; }
getHeaderSearchInfo()1200   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
1201 
getIdentifierTable()1202   IdentifierTable &getIdentifierTable() { return Identifiers; }
getIdentifierTable()1203   const IdentifierTable &getIdentifierTable() const { return Identifiers; }
getSelectorTable()1204   SelectorTable &getSelectorTable() { return Selectors; }
getBuiltinInfo()1205   Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
getPreprocessorAllocator()1206   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
1207 
setExternalSource(ExternalPreprocessorSource * Source)1208   void setExternalSource(ExternalPreprocessorSource *Source) {
1209     ExternalSource = Source;
1210   }
1211 
getExternalSource()1212   ExternalPreprocessorSource *getExternalSource() const {
1213     return ExternalSource;
1214   }
1215 
1216   /// Retrieve the module loader associated with this preprocessor.
getModuleLoader()1217   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
1218 
hadModuleLoaderFatalFailure()1219   bool hadModuleLoaderFatalFailure() const {
1220     return TheModuleLoader.HadFatalFailure;
1221   }
1222 
1223   /// Retrieve the number of Directives that have been processed by the
1224   /// Preprocessor.
getNumDirectives()1225   unsigned getNumDirectives() const {
1226     return NumDirectives;
1227   }
1228 
1229   /// True if we are currently preprocessing a #if or #elif directive
isParsingIfOrElifDirective()1230   bool isParsingIfOrElifDirective() const {
1231     return ParsingIfOrElifDirective;
1232   }
1233 
1234   /// Control whether the preprocessor retains comments in output.
SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)1235   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
1236     this->KeepComments = KeepComments | KeepMacroComments;
1237     this->KeepMacroComments = KeepMacroComments;
1238   }
1239 
getCommentRetentionState()1240   bool getCommentRetentionState() const { return KeepComments; }
1241 
setPragmasEnabled(bool Enabled)1242   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
getPragmasEnabled()1243   bool getPragmasEnabled() const { return PragmasEnabled; }
1244 
SetSuppressIncludeNotFoundError(bool Suppress)1245   void SetSuppressIncludeNotFoundError(bool Suppress) {
1246     SuppressIncludeNotFoundError = Suppress;
1247   }
1248 
GetSuppressIncludeNotFoundError()1249   bool GetSuppressIncludeNotFoundError() {
1250     return SuppressIncludeNotFoundError;
1251   }
1252 
1253   /// Sets whether the preprocessor is responsible for producing output or if
1254   /// it is producing tokens to be consumed by Parse and Sema.
setPreprocessedOutput(bool IsPreprocessedOutput)1255   void setPreprocessedOutput(bool IsPreprocessedOutput) {
1256     PreprocessedOutput = IsPreprocessedOutput;
1257   }
1258 
1259   /// Returns true if the preprocessor is responsible for generating output,
1260   /// false if it is producing tokens to be consumed by Parse and Sema.
isPreprocessedOutput()1261   bool isPreprocessedOutput() const { return PreprocessedOutput; }
1262 
1263   /// Return true if we are lexing directly from the specified lexer.
isCurrentLexer(const PreprocessorLexer * L)1264   bool isCurrentLexer(const PreprocessorLexer *L) const {
1265     return CurPPLexer == L;
1266   }
1267 
1268   /// Return the current lexer being lexed from.
1269   ///
1270   /// Note that this ignores any potentially active macro expansions and _Pragma
1271   /// expansions going on at the time.
getCurrentLexer()1272   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1273 
1274   /// Return the current file lexer being lexed from.
1275   ///
1276   /// Note that this ignores any potentially active macro expansions and _Pragma
1277   /// expansions going on at the time.
1278   PreprocessorLexer *getCurrentFileLexer() const;
1279 
1280   /// Return the submodule owning the file being lexed. This may not be
1281   /// the current module if we have changed modules since entering the file.
getCurrentLexerSubmodule()1282   Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1283 
1284   /// Returns the FileID for the preprocessor predefines.
getPredefinesFileID()1285   FileID getPredefinesFileID() const { return PredefinesFileID; }
1286 
1287   /// \{
1288   /// Accessors for preprocessor callbacks.
1289   ///
1290   /// Note that this class takes ownership of any PPCallbacks object given to
1291   /// it.
getPPCallbacks()1292   PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
addPPCallbacks(std::unique_ptr<PPCallbacks> C)1293   void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1294     if (Callbacks)
1295       C = std::make_unique<PPChainedCallbacks>(std::move(C),
1296                                                 std::move(Callbacks));
1297     Callbacks = std::move(C);
1298   }
1299   /// \}
1300 
1301   /// Get the number of tokens processed so far.
getTokenCount()1302   unsigned getTokenCount() const { return TokenCount; }
1303 
1304   /// Get the max number of tokens before issuing a -Wmax-tokens warning.
getMaxTokens()1305   unsigned getMaxTokens() const { return MaxTokens; }
1306 
overrideMaxTokens(unsigned Value,SourceLocation Loc)1307   void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1308     MaxTokens = Value;
1309     MaxTokensOverrideLoc = Loc;
1310   };
1311 
getMaxTokensOverrideLoc()1312   SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1313 
1314   /// Register a function that would be called on each token in the final
1315   /// expanded token stream.
1316   /// This also reports annotation tokens produced by the parser.
setTokenWatcher(llvm::unique_function<void (const clang::Token &)> F)1317   void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1318     OnToken = std::move(F);
1319   }
1320 
setPreprocessToken(bool Preprocess)1321   void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1322 
isMacroDefined(StringRef Id)1323   bool isMacroDefined(StringRef Id) {
1324     return isMacroDefined(&Identifiers.get(Id));
1325   }
isMacroDefined(const IdentifierInfo * II)1326   bool isMacroDefined(const IdentifierInfo *II) {
1327     return II->hasMacroDefinition() &&
1328            (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1329   }
1330 
1331   /// Determine whether II is defined as a macro within the module M,
1332   /// if that is a module that we've already preprocessed. Does not check for
1333   /// macros imported into M.
isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)1334   bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1335     if (!II->hasMacroDefinition())
1336       return false;
1337     auto I = Submodules.find(M);
1338     if (I == Submodules.end())
1339       return false;
1340     auto J = I->second.Macros.find(II);
1341     if (J == I->second.Macros.end())
1342       return false;
1343     auto *MD = J->second.getLatest();
1344     return MD && MD->isDefined();
1345   }
1346 
getMacroDefinition(const IdentifierInfo * II)1347   MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1348     if (!II->hasMacroDefinition())
1349       return {};
1350 
1351     MacroState &S = CurSubmoduleState->Macros[II];
1352     auto *MD = S.getLatest();
1353     while (MD && isa<VisibilityMacroDirective>(MD))
1354       MD = MD->getPrevious();
1355     return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1356                            S.getActiveModuleMacros(*this, II),
1357                            S.isAmbiguous(*this, II));
1358   }
1359 
getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)1360   MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1361                                           SourceLocation Loc) {
1362     if (!II->hadMacroDefinition())
1363       return {};
1364 
1365     MacroState &S = CurSubmoduleState->Macros[II];
1366     MacroDirective::DefInfo DI;
1367     if (auto *MD = S.getLatest())
1368       DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1369     // FIXME: Compute the set of active module macros at the specified location.
1370     return MacroDefinition(DI.getDirective(),
1371                            S.getActiveModuleMacros(*this, II),
1372                            S.isAmbiguous(*this, II));
1373   }
1374 
1375   /// Given an identifier, return its latest non-imported MacroDirective
1376   /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
getLocalMacroDirective(const IdentifierInfo * II)1377   MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1378     if (!II->hasMacroDefinition())
1379       return nullptr;
1380 
1381     auto *MD = getLocalMacroDirectiveHistory(II);
1382     if (!MD || MD->getDefinition().isUndefined())
1383       return nullptr;
1384 
1385     return MD;
1386   }
1387 
getMacroInfo(const IdentifierInfo * II)1388   const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1389     return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1390   }
1391 
getMacroInfo(const IdentifierInfo * II)1392   MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1393     if (!II->hasMacroDefinition())
1394       return nullptr;
1395     if (auto MD = getMacroDefinition(II))
1396       return MD.getMacroInfo();
1397     return nullptr;
1398   }
1399 
1400   /// Given an identifier, return the latest non-imported macro
1401   /// directive for that identifier.
1402   ///
1403   /// One can iterate over all previous macro directives from the most recent
1404   /// one.
1405   MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1406 
1407   /// Add a directive to the macro directive history for this identifier.
1408   void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)1409   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1410                                              SourceLocation Loc) {
1411     DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1412     appendMacroDirective(II, MD);
1413     return MD;
1414   }
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)1415   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1416                                              MacroInfo *MI) {
1417     return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1418   }
1419 
1420   /// Set a MacroDirective that was loaded from a PCH file.
1421   void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1422                                MacroDirective *MD);
1423 
1424   /// Register an exported macro for a module and identifier.
1425   ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
1426                               ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1427   ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II);
1428 
1429   /// Get the list of leaf (non-overridden) module macros for a name.
getLeafModuleMacros(const IdentifierInfo * II)1430   ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1431     if (II->isOutOfDate())
1432       updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
1433     auto I = LeafModuleMacros.find(II);
1434     if (I != LeafModuleMacros.end())
1435       return I->second;
1436     return std::nullopt;
1437   }
1438 
1439   /// Get the list of submodules that we're currently building.
getBuildingSubmodules()1440   ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const {
1441     return BuildingSubmoduleStack;
1442   }
1443 
1444   /// \{
1445   /// Iterators for the macro history table. Currently defined macros have
1446   /// IdentifierInfo::hasMacroDefinition() set and an empty
1447   /// MacroInfo::getUndefLoc() at the head of the list.
1448   using macro_iterator = MacroMap::const_iterator;
1449 
1450   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1451   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1452 
1453   llvm::iterator_range<macro_iterator>
1454   macros(bool IncludeExternalMacros = true) const {
1455     macro_iterator begin = macro_begin(IncludeExternalMacros);
1456     macro_iterator end = macro_end(IncludeExternalMacros);
1457     return llvm::make_range(begin, end);
1458   }
1459 
1460   /// \}
1461 
1462   /// Mark the given clang module as affecting the current clang module or translation unit.
markClangModuleAsAffecting(Module * M)1463   void markClangModuleAsAffecting(Module *M) {
1464     assert(M->isModuleMapModule());
1465     if (!BuildingSubmoduleStack.empty()) {
1466       if (M != BuildingSubmoduleStack.back().M)
1467         BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M);
1468     } else {
1469       AffectingClangModules.insert(M);
1470     }
1471   }
1472 
1473   /// Get the set of top-level clang modules that affected preprocessing, but were not
1474   /// imported.
getAffectingClangModules()1475   const llvm::SmallSetVector<Module *, 2> &getAffectingClangModules() const {
1476     return AffectingClangModules;
1477   }
1478 
1479   /// Mark the file as included.
1480   /// Returns true if this is the first time the file was included.
markIncluded(FileEntryRef File)1481   bool markIncluded(FileEntryRef File) {
1482     HeaderInfo.getFileInfo(File);
1483     return IncludedFiles.insert(File).second;
1484   }
1485 
1486   /// Return true if this header has already been included.
alreadyIncluded(FileEntryRef File)1487   bool alreadyIncluded(FileEntryRef File) const {
1488     HeaderInfo.getFileInfo(File);
1489     return IncludedFiles.count(File);
1490   }
1491 
1492   /// Get the set of included files.
getIncludedFiles()1493   IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
getIncludedFiles()1494   const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
1495 
1496   /// Return the name of the macro defined before \p Loc that has
1497   /// spelling \p Tokens.  If there are multiple macros with same spelling,
1498   /// return the last one defined.
1499   StringRef getLastMacroWithSpelling(SourceLocation Loc,
1500                                      ArrayRef<TokenValue> Tokens) const;
1501 
1502   /// Get the predefines for this processor.
1503   /// Used by some third-party tools to inspect and add predefines (see
1504   /// https://github.com/llvm/llvm-project/issues/57483).
getPredefines()1505   const std::string &getPredefines() const { return Predefines; }
1506 
1507   /// Set the predefines for this Preprocessor.
1508   ///
1509   /// These predefines are automatically injected when parsing the main file.
setPredefines(std::string P)1510   void setPredefines(std::string P) { Predefines = std::move(P); }
1511 
1512   /// Return information about the specified preprocessor
1513   /// identifier token.
getIdentifierInfo(StringRef Name)1514   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1515     return &Identifiers.get(Name);
1516   }
1517 
1518   /// Add the specified pragma handler to this preprocessor.
1519   ///
1520   /// If \p Namespace is non-null, then it is a token required to exist on the
1521   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1522   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
AddPragmaHandler(PragmaHandler * Handler)1523   void AddPragmaHandler(PragmaHandler *Handler) {
1524     AddPragmaHandler(StringRef(), Handler);
1525   }
1526 
1527   /// Remove the specific pragma handler from this preprocessor.
1528   ///
1529   /// If \p Namespace is non-null, then it should be the namespace that
1530   /// \p Handler was added to. It is an error to remove a handler that
1531   /// has not been registered.
1532   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
RemovePragmaHandler(PragmaHandler * Handler)1533   void RemovePragmaHandler(PragmaHandler *Handler) {
1534     RemovePragmaHandler(StringRef(), Handler);
1535   }
1536 
1537   /// Install empty handlers for all pragmas (making them ignored).
1538   void IgnorePragmas();
1539 
1540   /// Set empty line handler.
setEmptylineHandler(EmptylineHandler * Handler)1541   void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1542 
getEmptylineHandler()1543   EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1544 
1545   /// Add the specified comment handler to the preprocessor.
1546   void addCommentHandler(CommentHandler *Handler);
1547 
1548   /// Remove the specified comment handler.
1549   ///
1550   /// It is an error to remove a handler that has not been registered.
1551   void removeCommentHandler(CommentHandler *Handler);
1552 
1553   /// Set the code completion handler to the given object.
setCodeCompletionHandler(CodeCompletionHandler & Handler)1554   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1555     CodeComplete = &Handler;
1556   }
1557 
1558   /// Retrieve the current code-completion handler.
getCodeCompletionHandler()1559   CodeCompletionHandler *getCodeCompletionHandler() const {
1560     return CodeComplete;
1561   }
1562 
1563   /// Clear out the code completion handler.
clearCodeCompletionHandler()1564   void clearCodeCompletionHandler() {
1565     CodeComplete = nullptr;
1566   }
1567 
1568   /// Hook used by the lexer to invoke the "included file" code
1569   /// completion point.
1570   void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1571 
1572   /// Hook used by the lexer to invoke the "natural language" code
1573   /// completion point.
1574   void CodeCompleteNaturalLanguage();
1575 
1576   /// Set the code completion token for filtering purposes.
setCodeCompletionIdentifierInfo(IdentifierInfo * Filter)1577   void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1578     CodeCompletionII = Filter;
1579   }
1580 
1581   /// Set the code completion token range for detecting replacement range later
1582   /// on.
setCodeCompletionTokenRange(const SourceLocation Start,const SourceLocation End)1583   void setCodeCompletionTokenRange(const SourceLocation Start,
1584                                    const SourceLocation End) {
1585     CodeCompletionTokenRange = {Start, End};
1586   }
getCodeCompletionTokenRange()1587   SourceRange getCodeCompletionTokenRange() const {
1588     return CodeCompletionTokenRange;
1589   }
1590 
1591   /// Get the code completion token for filtering purposes.
getCodeCompletionFilter()1592   StringRef getCodeCompletionFilter() {
1593     if (CodeCompletionII)
1594       return CodeCompletionII->getName();
1595     return {};
1596   }
1597 
1598   /// Retrieve the preprocessing record, or NULL if there is no
1599   /// preprocessing record.
getPreprocessingRecord()1600   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1601 
1602   /// Create a new preprocessing record, which will keep track of
1603   /// all macro expansions, macro definitions, etc.
1604   void createPreprocessingRecord();
1605 
1606   /// Returns true if the FileEntry is the PCH through header.
1607   bool isPCHThroughHeader(const FileEntry *FE);
1608 
1609   /// True if creating a PCH with a through header.
1610   bool creatingPCHWithThroughHeader();
1611 
1612   /// True if using a PCH with a through header.
1613   bool usingPCHWithThroughHeader();
1614 
1615   /// True if creating a PCH with a #pragma hdrstop.
1616   bool creatingPCHWithPragmaHdrStop();
1617 
1618   /// True if using a PCH with a #pragma hdrstop.
1619   bool usingPCHWithPragmaHdrStop();
1620 
1621   /// Skip tokens until after the #include of the through header or
1622   /// until after a #pragma hdrstop.
1623   void SkipTokensWhileUsingPCH();
1624 
1625   /// Process directives while skipping until the through header or
1626   /// #pragma hdrstop is found.
1627   void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1628                                            SourceLocation HashLoc);
1629 
1630   /// Enter the specified FileID as the main source file,
1631   /// which implicitly adds the builtin defines etc.
1632   void EnterMainSourceFile();
1633 
1634   /// Inform the preprocessor callbacks that processing is complete.
1635   void EndSourceFile();
1636 
1637   /// Add a source file to the top of the include stack and
1638   /// start lexing tokens from it instead of the current buffer.
1639   ///
1640   /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1641   bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir,
1642                        SourceLocation Loc, bool IsFirstIncludeOfFile = true);
1643 
1644   /// Add a Macro to the top of the include stack and start lexing
1645   /// tokens from it instead of the current buffer.
1646   ///
1647   /// \param Args specifies the tokens input to a function-like macro.
1648   /// \param ILEnd specifies the location of the ')' for a function-like macro
1649   /// or the identifier for an object-like macro.
1650   void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1651                   MacroArgs *Args);
1652 
1653 private:
1654   /// Add a "macro" context to the top of the include stack,
1655   /// which will cause the lexer to start returning the specified tokens.
1656   ///
1657   /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1658   /// will not be subject to further macro expansion. Otherwise, these tokens
1659   /// will be re-macro-expanded when/if expansion is enabled.
1660   ///
1661   /// If \p OwnsTokens is false, this method assumes that the specified stream
1662   /// of tokens has a permanent owner somewhere, so they do not need to be
1663   /// copied. If it is true, it assumes the array of tokens is allocated with
1664   /// \c new[] and the Preprocessor will delete[] it.
1665   ///
1666   /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1667   /// set, see the flag documentation for details.
1668   void EnterTokenStream(const Token *Toks, unsigned NumToks,
1669                         bool DisableMacroExpansion, bool OwnsTokens,
1670                         bool IsReinject);
1671 
1672 public:
EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion,bool IsReinject)1673   void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1674                         bool DisableMacroExpansion, bool IsReinject) {
1675     EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1676                      IsReinject);
1677   }
1678 
EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion,bool IsReinject)1679   void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1680                         bool IsReinject) {
1681     EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1682                      IsReinject);
1683   }
1684 
1685   /// Pop the current lexer/macro exp off the top of the lexer stack.
1686   ///
1687   /// This should only be used in situations where the current state of the
1688   /// top-of-stack lexer is known.
1689   void RemoveTopOfLexerStack();
1690 
1691   /// From the point that this method is called, and until
1692   /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1693   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1694   /// make the Preprocessor re-lex the same tokens.
1695   ///
1696   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1697   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1698   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1699   ///
1700   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1701   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1702   /// tokens will continue indefinitely.
1703   ///
1704   void EnableBacktrackAtThisPos();
1705 
1706   /// Disable the last EnableBacktrackAtThisPos call.
1707   void CommitBacktrackedTokens();
1708 
1709   /// Make Preprocessor re-lex the tokens that were lexed since
1710   /// EnableBacktrackAtThisPos() was previously called.
1711   void Backtrack();
1712 
1713   /// True if EnableBacktrackAtThisPos() was called and
1714   /// caching of tokens is on.
isBacktrackEnabled()1715   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1716 
1717   /// Lex the next token for this preprocessor.
1718   void Lex(Token &Result);
1719 
1720   /// Lex all tokens for this preprocessor until (and excluding) end of file.
1721   void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr);
1722 
1723   /// Lex a token, forming a header-name token if possible.
1724   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1725 
1726   bool LexAfterModuleImport(Token &Result);
1727   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1728 
1729   void makeModuleVisible(Module *M, SourceLocation Loc);
1730 
getModuleImportLoc(Module * M)1731   SourceLocation getModuleImportLoc(Module *M) const {
1732     return CurSubmoduleState->VisibleModules.getImportLoc(M);
1733   }
1734 
1735   /// Lex a string literal, which may be the concatenation of multiple
1736   /// string literals and may even come from macro expansion.
1737   /// \returns true on success, false if a error diagnostic has been generated.
LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1738   bool LexStringLiteral(Token &Result, std::string &String,
1739                         const char *DiagnosticTag, bool AllowMacroExpansion) {
1740     if (AllowMacroExpansion)
1741       Lex(Result);
1742     else
1743       LexUnexpandedToken(Result);
1744     return FinishLexStringLiteral(Result, String, DiagnosticTag,
1745                                   AllowMacroExpansion);
1746   }
1747 
1748   /// Complete the lexing of a string literal where the first token has
1749   /// already been lexed (see LexStringLiteral).
1750   bool FinishLexStringLiteral(Token &Result, std::string &String,
1751                               const char *DiagnosticTag,
1752                               bool AllowMacroExpansion);
1753 
1754   /// Lex a token.  If it's a comment, keep lexing until we get
1755   /// something not a comment.
1756   ///
1757   /// This is useful in -E -C mode where comments would foul up preprocessor
1758   /// directive handling.
LexNonComment(Token & Result)1759   void LexNonComment(Token &Result) {
1760     do
1761       Lex(Result);
1762     while (Result.getKind() == tok::comment);
1763   }
1764 
1765   /// Just like Lex, but disables macro expansion of identifier tokens.
LexUnexpandedToken(Token & Result)1766   void LexUnexpandedToken(Token &Result) {
1767     // Disable macro expansion.
1768     bool OldVal = DisableMacroExpansion;
1769     DisableMacroExpansion = true;
1770     // Lex the token.
1771     Lex(Result);
1772 
1773     // Reenable it.
1774     DisableMacroExpansion = OldVal;
1775   }
1776 
1777   /// Like LexNonComment, but this disables macro expansion of
1778   /// identifier tokens.
LexUnexpandedNonComment(Token & Result)1779   void LexUnexpandedNonComment(Token &Result) {
1780     do
1781       LexUnexpandedToken(Result);
1782     while (Result.getKind() == tok::comment);
1783   }
1784 
1785   /// Parses a simple integer literal to get its numeric value.  Floating
1786   /// point literals and user defined literals are rejected.  Used primarily to
1787   /// handle pragmas that accept integer arguments.
1788   bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1789 
1790   /// Disables macro expansion everywhere except for preprocessor directives.
SetMacroExpansionOnlyInDirectives()1791   void SetMacroExpansionOnlyInDirectives() {
1792     DisableMacroExpansion = true;
1793     MacroExpansionInDirectivesOverride = true;
1794   }
1795 
1796   /// Peeks ahead N tokens and returns that token without consuming any
1797   /// tokens.
1798   ///
1799   /// LookAhead(0) returns the next token that would be returned by Lex(),
1800   /// LookAhead(1) returns the token after it, etc.  This returns normal
1801   /// tokens after phase 5.  As such, it is equivalent to using
1802   /// 'Lex', not 'LexUnexpandedToken'.
LookAhead(unsigned N)1803   const Token &LookAhead(unsigned N) {
1804     assert(LexLevel == 0 && "cannot use lookahead while lexing");
1805     if (CachedLexPos + N < CachedTokens.size())
1806       return CachedTokens[CachedLexPos+N];
1807     else
1808       return PeekAhead(N+1);
1809   }
1810 
1811   /// When backtracking is enabled and tokens are cached,
1812   /// this allows to revert a specific number of tokens.
1813   ///
1814   /// Note that the number of tokens being reverted should be up to the last
1815   /// backtrack position, not more.
RevertCachedTokens(unsigned N)1816   void RevertCachedTokens(unsigned N) {
1817     assert(isBacktrackEnabled() &&
1818            "Should only be called when tokens are cached for backtracking");
1819     assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1820          && "Should revert tokens up to the last backtrack position, not more");
1821     assert(signed(CachedLexPos) - signed(N) >= 0 &&
1822            "Corrupted backtrack positions ?");
1823     CachedLexPos -= N;
1824   }
1825 
1826   /// Enters a token in the token stream to be lexed next.
1827   ///
1828   /// If BackTrack() is called afterwards, the token will remain at the
1829   /// insertion point.
1830   /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1831   /// flag set. See the flag documentation for details.
EnterToken(const Token & Tok,bool IsReinject)1832   void EnterToken(const Token &Tok, bool IsReinject) {
1833     if (LexLevel) {
1834       // It's not correct in general to enter caching lex mode while in the
1835       // middle of a nested lexing action.
1836       auto TokCopy = std::make_unique<Token[]>(1);
1837       TokCopy[0] = Tok;
1838       EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1839     } else {
1840       EnterCachingLexMode();
1841       assert(IsReinject && "new tokens in the middle of cached stream");
1842       CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1843     }
1844   }
1845 
1846   /// We notify the Preprocessor that if it is caching tokens (because
1847   /// backtrack is enabled) it should replace the most recent cached tokens
1848   /// with the given annotation token. This function has no effect if
1849   /// backtracking is not enabled.
1850   ///
1851   /// Note that the use of this function is just for optimization, so that the
1852   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1853   /// invoked.
AnnotateCachedTokens(const Token & Tok)1854   void AnnotateCachedTokens(const Token &Tok) {
1855     assert(Tok.isAnnotation() && "Expected annotation token");
1856     if (CachedLexPos != 0 && isBacktrackEnabled())
1857       AnnotatePreviousCachedTokens(Tok);
1858   }
1859 
1860   /// Get the location of the last cached token, suitable for setting the end
1861   /// location of an annotation token.
getLastCachedTokenLocation()1862   SourceLocation getLastCachedTokenLocation() const {
1863     assert(CachedLexPos != 0);
1864     return CachedTokens[CachedLexPos-1].getLastLoc();
1865   }
1866 
1867   /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1868   /// CachedTokens.
1869   bool IsPreviousCachedToken(const Token &Tok) const;
1870 
1871   /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1872   /// in \p NewToks.
1873   ///
1874   /// Useful when a token needs to be split in smaller ones and CachedTokens
1875   /// most recent token must to be updated to reflect that.
1876   void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1877 
1878   /// Replace the last token with an annotation token.
1879   ///
1880   /// Like AnnotateCachedTokens(), this routine replaces an
1881   /// already-parsed (and resolved) token with an annotation
1882   /// token. However, this routine only replaces the last token with
1883   /// the annotation token; it does not affect any other cached
1884   /// tokens. This function has no effect if backtracking is not
1885   /// enabled.
ReplaceLastTokenWithAnnotation(const Token & Tok)1886   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1887     assert(Tok.isAnnotation() && "Expected annotation token");
1888     if (CachedLexPos != 0 && isBacktrackEnabled())
1889       CachedTokens[CachedLexPos-1] = Tok;
1890   }
1891 
1892   /// Enter an annotation token into the token stream.
1893   void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1894                             void *AnnotationVal);
1895 
1896   /// Determine whether it's possible for a future call to Lex to produce an
1897   /// annotation token created by a previous call to EnterAnnotationToken.
mightHavePendingAnnotationTokens()1898   bool mightHavePendingAnnotationTokens() {
1899     return CurLexerCallback != CLK_Lexer;
1900   }
1901 
1902   /// Update the current token to represent the provided
1903   /// identifier, in order to cache an action performed by typo correction.
TypoCorrectToken(const Token & Tok)1904   void TypoCorrectToken(const Token &Tok) {
1905     assert(Tok.getIdentifierInfo() && "Expected identifier token");
1906     if (CachedLexPos != 0 && isBacktrackEnabled())
1907       CachedTokens[CachedLexPos-1] = Tok;
1908   }
1909 
1910   /// Recompute the current lexer kind based on the CurLexer/
1911   /// CurTokenLexer pointers.
1912   void recomputeCurLexerKind();
1913 
1914   /// Returns true if incremental processing is enabled
isIncrementalProcessingEnabled()1915   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1916 
1917   /// Enables the incremental processing
1918   void enableIncrementalProcessing(bool value = true) {
1919     IncrementalProcessing = value;
1920   }
1921 
1922   /// Specify the point at which code-completion will be performed.
1923   ///
1924   /// \param File the file in which code completion should occur. If
1925   /// this file is included multiple times, code-completion will
1926   /// perform completion the first time it is included. If NULL, this
1927   /// function clears out the code-completion point.
1928   ///
1929   /// \param Line the line at which code completion should occur
1930   /// (1-based).
1931   ///
1932   /// \param Column the column at which code completion should occur
1933   /// (1-based).
1934   ///
1935   /// \returns true if an error occurred, false otherwise.
1936   bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line,
1937                               unsigned Column);
1938 
1939   /// Determine if we are performing code completion.
isCodeCompletionEnabled()1940   bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1941 
1942   /// Returns the location of the code-completion point.
1943   ///
1944   /// Returns an invalid location if code-completion is not enabled or the file
1945   /// containing the code-completion point has not been lexed yet.
getCodeCompletionLoc()1946   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1947 
1948   /// Returns the start location of the file of code-completion point.
1949   ///
1950   /// Returns an invalid location if code-completion is not enabled or the file
1951   /// containing the code-completion point has not been lexed yet.
getCodeCompletionFileLoc()1952   SourceLocation getCodeCompletionFileLoc() const {
1953     return CodeCompletionFileLoc;
1954   }
1955 
1956   /// Returns true if code-completion is enabled and we have hit the
1957   /// code-completion point.
isCodeCompletionReached()1958   bool isCodeCompletionReached() const { return CodeCompletionReached; }
1959 
1960   /// Note that we hit the code-completion point.
setCodeCompletionReached()1961   void setCodeCompletionReached() {
1962     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1963     CodeCompletionReached = true;
1964     // Silence any diagnostics that occur after we hit the code-completion.
1965     getDiagnostics().setSuppressAllDiagnostics(true);
1966   }
1967 
1968   /// The location of the currently-active \#pragma clang
1969   /// arc_cf_code_audited begin.
1970   ///
1971   /// Returns an invalid location if there is no such pragma active.
1972   std::pair<IdentifierInfo *, SourceLocation>
getPragmaARCCFCodeAuditedInfo()1973   getPragmaARCCFCodeAuditedInfo() const {
1974     return PragmaARCCFCodeAuditedInfo;
1975   }
1976 
1977   /// Set the location of the currently-active \#pragma clang
1978   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
setPragmaARCCFCodeAuditedInfo(IdentifierInfo * Ident,SourceLocation Loc)1979   void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
1980                                      SourceLocation Loc) {
1981     PragmaARCCFCodeAuditedInfo = {Ident, Loc};
1982   }
1983 
1984   /// The location of the currently-active \#pragma clang
1985   /// assume_nonnull begin.
1986   ///
1987   /// Returns an invalid location if there is no such pragma active.
getPragmaAssumeNonNullLoc()1988   SourceLocation getPragmaAssumeNonNullLoc() const {
1989     return PragmaAssumeNonNullLoc;
1990   }
1991 
1992   /// Set the location of the currently-active \#pragma clang
1993   /// assume_nonnull begin.  An invalid location ends the pragma.
setPragmaAssumeNonNullLoc(SourceLocation Loc)1994   void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
1995     PragmaAssumeNonNullLoc = Loc;
1996   }
1997 
1998   /// Get the location of the recorded unterminated \#pragma clang
1999   /// assume_nonnull begin in the preamble, if one exists.
2000   ///
2001   /// Returns an invalid location if the premable did not end with
2002   /// such a pragma active or if there is no recorded preamble.
getPreambleRecordedPragmaAssumeNonNullLoc()2003   SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const {
2004     return PreambleRecordedPragmaAssumeNonNullLoc;
2005   }
2006 
2007   /// Record the location of the unterminated \#pragma clang
2008   /// assume_nonnull begin in the preamble.
setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc)2009   void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) {
2010     PreambleRecordedPragmaAssumeNonNullLoc = Loc;
2011   }
2012 
2013   /// Set the directory in which the main file should be considered
2014   /// to have been found, if it is not a real file.
setMainFileDir(DirectoryEntryRef Dir)2015   void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; }
2016 
2017   /// Instruct the preprocessor to skip part of the main source file.
2018   ///
2019   /// \param Bytes The number of bytes in the preamble to skip.
2020   ///
2021   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
2022   /// start of a line.
setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)2023   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
2024     SkipMainFilePreamble.first = Bytes;
2025     SkipMainFilePreamble.second = StartOfLine;
2026   }
2027 
2028   /// Forwarding function for diagnostics.  This emits a diagnostic at
2029   /// the specified Token's location, translating the token's start
2030   /// position in the current buffer into a SourcePosition object for rendering.
Diag(SourceLocation Loc,unsigned DiagID)2031   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
2032     return Diags->Report(Loc, DiagID);
2033   }
2034 
Diag(const Token & Tok,unsigned DiagID)2035   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
2036     return Diags->Report(Tok.getLocation(), DiagID);
2037   }
2038 
2039   /// Return the 'spelling' of the token at the given
2040   /// location; does not go up to the spelling location or down to the
2041   /// expansion location.
2042   ///
2043   /// \param buffer A buffer which will be used only if the token requires
2044   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
2045   /// \param invalid If non-null, will be set \c true if an error occurs.
2046   StringRef getSpelling(SourceLocation loc,
2047                         SmallVectorImpl<char> &buffer,
2048                         bool *invalid = nullptr) const {
2049     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
2050   }
2051 
2052   /// Return the 'spelling' of the Tok token.
2053   ///
2054   /// The spelling of a token is the characters used to represent the token in
2055   /// the source file after trigraph expansion and escaped-newline folding.  In
2056   /// particular, this wants to get the true, uncanonicalized, spelling of
2057   /// things like digraphs, UCNs, etc.
2058   ///
2059   /// \param Invalid If non-null, will be set \c true if an error occurs.
2060   std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
2061     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
2062   }
2063 
2064   /// Get the spelling of a token into a preallocated buffer, instead
2065   /// of as an std::string.
2066   ///
2067   /// The caller is required to allocate enough space for the token, which is
2068   /// guaranteed to be at least Tok.getLength() bytes long. The length of the
2069   /// actual result is returned.
2070   ///
2071   /// Note that this method may do two possible things: it may either fill in
2072   /// the buffer specified with characters, or it may *change the input pointer*
2073   /// to point to a constant buffer with the data already in it (avoiding a
2074   /// copy).  The caller is not allowed to modify the returned buffer pointer
2075   /// if an internal buffer is returned.
2076   unsigned getSpelling(const Token &Tok, const char *&Buffer,
2077                        bool *Invalid = nullptr) const {
2078     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
2079   }
2080 
2081   /// Get the spelling of a token into a SmallVector.
2082   ///
2083   /// Note that the returned StringRef may not point to the
2084   /// supplied buffer if a copy can be avoided.
2085   StringRef getSpelling(const Token &Tok,
2086                         SmallVectorImpl<char> &Buffer,
2087                         bool *Invalid = nullptr) const;
2088 
2089   /// Relex the token at the specified location.
2090   /// \returns true if there was a failure, false on success.
2091   bool getRawToken(SourceLocation Loc, Token &Result,
2092                    bool IgnoreWhiteSpace = false) {
2093     return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
2094   }
2095 
2096   /// Given a Token \p Tok that is a numeric constant with length 1,
2097   /// return the character.
2098   char
2099   getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
2100                                               bool *Invalid = nullptr) const {
2101     assert(Tok.is(tok::numeric_constant) &&
2102            Tok.getLength() == 1 && "Called on unsupported token");
2103     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
2104 
2105     // If the token is carrying a literal data pointer, just use it.
2106     if (const char *D = Tok.getLiteralData())
2107       return *D;
2108 
2109     // Otherwise, fall back on getCharacterData, which is slower, but always
2110     // works.
2111     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
2112   }
2113 
2114   /// Retrieve the name of the immediate macro expansion.
2115   ///
2116   /// This routine starts from a source location, and finds the name of the
2117   /// macro responsible for its immediate expansion. It looks through any
2118   /// intervening macro argument expansions to compute this. It returns a
2119   /// StringRef that refers to the SourceManager-owned buffer of the source
2120   /// where that macro name is spelled. Thus, the result shouldn't out-live
2121   /// the SourceManager.
getImmediateMacroName(SourceLocation Loc)2122   StringRef getImmediateMacroName(SourceLocation Loc) {
2123     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
2124   }
2125 
2126   /// Plop the specified string into a scratch buffer and set the
2127   /// specified token's location and length to it.
2128   ///
2129   /// If specified, the source location provides a location of the expansion
2130   /// point of the token.
2131   void CreateString(StringRef Str, Token &Tok,
2132                     SourceLocation ExpansionLocStart = SourceLocation(),
2133                     SourceLocation ExpansionLocEnd = SourceLocation());
2134 
2135   /// Split the first Length characters out of the token starting at TokLoc
2136   /// and return a location pointing to the split token. Re-lexing from the
2137   /// split token will return the split token rather than the original.
2138   SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
2139 
2140   /// Computes the source location just past the end of the
2141   /// token at this source location.
2142   ///
2143   /// This routine can be used to produce a source location that
2144   /// points just past the end of the token referenced by \p Loc, and
2145   /// is generally used when a diagnostic needs to point just after a
2146   /// token where it expected something different that it received. If
2147   /// the returned source location would not be meaningful (e.g., if
2148   /// it points into a macro), this routine returns an invalid
2149   /// source location.
2150   ///
2151   /// \param Offset an offset from the end of the token, where the source
2152   /// location should refer to. The default offset (0) produces a source
2153   /// location pointing just past the end of the token; an offset of 1 produces
2154   /// a source location pointing to the last character in the token, etc.
2155   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
2156     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
2157   }
2158 
2159   /// Returns true if the given MacroID location points at the first
2160   /// token of the macro expansion.
2161   ///
2162   /// \param MacroBegin If non-null and function returns true, it is set to
2163   /// begin location of the macro.
2164   bool isAtStartOfMacroExpansion(SourceLocation loc,
2165                                  SourceLocation *MacroBegin = nullptr) const {
2166     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
2167                                             MacroBegin);
2168   }
2169 
2170   /// Returns true if the given MacroID location points at the last
2171   /// token of the macro expansion.
2172   ///
2173   /// \param MacroEnd If non-null and function returns true, it is set to
2174   /// end location of the macro.
2175   bool isAtEndOfMacroExpansion(SourceLocation loc,
2176                                SourceLocation *MacroEnd = nullptr) const {
2177     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
2178   }
2179 
2180   /// Print the token to stderr, used for debugging.
2181   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
2182   void DumpLocation(SourceLocation Loc) const;
2183   void DumpMacro(const MacroInfo &MI) const;
2184   void dumpMacroInfo(const IdentifierInfo *II);
2185 
2186   /// Given a location that specifies the start of a
2187   /// token, return a new location that specifies a character within the token.
AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)2188   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
2189                                          unsigned Char) const {
2190     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
2191   }
2192 
2193   /// Increment the counters for the number of token paste operations
2194   /// performed.
2195   ///
2196   /// If fast was specified, this is a 'fast paste' case we handled.
IncrementPasteCounter(bool isFast)2197   void IncrementPasteCounter(bool isFast) {
2198     if (isFast)
2199       ++NumFastTokenPaste;
2200     else
2201       ++NumTokenPaste;
2202   }
2203 
2204   void PrintStats();
2205 
2206   size_t getTotalMemory() const;
2207 
2208   /// When the macro expander pastes together a comment (/##/) in Microsoft
2209   /// mode, this method handles updating the current state, returning the
2210   /// token on the next source line.
2211   void HandleMicrosoftCommentPaste(Token &Tok);
2212 
2213   //===--------------------------------------------------------------------===//
2214   // Preprocessor callback methods.  These are invoked by a lexer as various
2215   // directives and events are found.
2216 
2217   /// Given a tok::raw_identifier token, look up the
2218   /// identifier information for the token and install it into the token,
2219   /// updating the token kind accordingly.
2220   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
2221 
2222 private:
2223   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
2224 
2225 public:
2226   /// Specifies the reason for poisoning an identifier.
2227   ///
2228   /// If that identifier is accessed while poisoned, then this reason will be
2229   /// used instead of the default "poisoned" diagnostic.
2230   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
2231 
2232   /// Display reason for poisoned identifier.
2233   void HandlePoisonedIdentifier(Token & Identifier);
2234 
MaybeHandlePoisonedIdentifier(Token & Identifier)2235   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
2236     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
2237       if(II->isPoisoned()) {
2238         HandlePoisonedIdentifier(Identifier);
2239       }
2240     }
2241   }
2242 
2243 private:
2244   /// Identifiers used for SEH handling in Borland. These are only
2245   /// allowed in particular circumstances
2246   // __except block
2247   IdentifierInfo *Ident__exception_code,
2248                  *Ident___exception_code,
2249                  *Ident_GetExceptionCode;
2250   // __except filter expression
2251   IdentifierInfo *Ident__exception_info,
2252                  *Ident___exception_info,
2253                  *Ident_GetExceptionInfo;
2254   // __finally
2255   IdentifierInfo *Ident__abnormal_termination,
2256                  *Ident___abnormal_termination,
2257                  *Ident_AbnormalTermination;
2258 
2259   const char *getCurLexerEndPos();
2260   void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
2261 
2262 public:
2263   void PoisonSEHIdentifiers(bool Poison = true); // Borland
2264 
2265   /// Callback invoked when the lexer reads an identifier and has
2266   /// filled in the tokens IdentifierInfo member.
2267   ///
2268   /// This callback potentially macro expands it or turns it into a named
2269   /// token (like 'for').
2270   ///
2271   /// \returns true if we actually computed a token, false if we need to
2272   /// lex again.
2273   bool HandleIdentifier(Token &Identifier);
2274 
2275   /// Callback invoked when the lexer hits the end of the current file.
2276   ///
2277   /// This either returns the EOF token and returns true, or
2278   /// pops a level off the include stack and returns false, at which point the
2279   /// client should call lex again.
2280   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
2281 
2282   /// Callback invoked when the current TokenLexer hits the end of its
2283   /// token stream.
2284   bool HandleEndOfTokenLexer(Token &Result);
2285 
2286   /// Callback invoked when the lexer sees a # token at the start of a
2287   /// line.
2288   ///
2289   /// This consumes the directive, modifies the lexer/preprocessor state, and
2290   /// advances the lexer(s) so that the next token read is the correct one.
2291   void HandleDirective(Token &Result);
2292 
2293   /// Ensure that the next token is a tok::eod token.
2294   ///
2295   /// If not, emit a diagnostic and consume up until the eod.
2296   /// If \p EnableMacros is true, then we consider macros that expand to zero
2297   /// tokens as being ok.
2298   ///
2299   /// \return The location of the end of the directive (the terminating
2300   /// newline).
2301   SourceLocation CheckEndOfDirective(const char *DirType,
2302                                      bool EnableMacros = false);
2303 
2304   /// Read and discard all tokens remaining on the current line until
2305   /// the tok::eod token is found. Returns the range of the skipped tokens.
2306   SourceRange DiscardUntilEndOfDirective();
2307 
2308   /// Returns true if the preprocessor has seen a use of
2309   /// __DATE__ or __TIME__ in the file so far.
SawDateOrTime()2310   bool SawDateOrTime() const {
2311     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
2312   }
getCounterValue()2313   unsigned getCounterValue() const { return CounterValue; }
setCounterValue(unsigned V)2314   void setCounterValue(unsigned V) { CounterValue = V; }
2315 
getCurrentFPEvalMethod()2316   LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const {
2317     assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine &&
2318            "FPEvalMethod should be set either from command line or from the "
2319            "target info");
2320     return CurrentFPEvalMethod;
2321   }
2322 
getTUFPEvalMethod()2323   LangOptions::FPEvalMethodKind getTUFPEvalMethod() const {
2324     return TUFPEvalMethod;
2325   }
2326 
getLastFPEvalPragmaLocation()2327   SourceLocation getLastFPEvalPragmaLocation() const {
2328     return LastFPEvalPragmaLocation;
2329   }
2330 
setCurrentFPEvalMethod(SourceLocation PragmaLoc,LangOptions::FPEvalMethodKind Val)2331   void setCurrentFPEvalMethod(SourceLocation PragmaLoc,
2332                               LangOptions::FPEvalMethodKind Val) {
2333     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2334            "FPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2335     // This is the location of the '#pragma float_control" where the
2336     // execution state is modifed.
2337     LastFPEvalPragmaLocation = PragmaLoc;
2338     CurrentFPEvalMethod = Val;
2339     TUFPEvalMethod = Val;
2340   }
2341 
setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)2342   void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) {
2343     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2344            "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2345     TUFPEvalMethod = Val;
2346   }
2347 
2348   /// Retrieves the module that we're currently building, if any.
2349   Module *getCurrentModule();
2350 
2351   /// Retrieves the module whose implementation we're current compiling, if any.
2352   Module *getCurrentModuleImplementation();
2353 
2354   /// If we are preprocessing a named module.
isInNamedModule()2355   bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }
2356 
2357   /// If we are proprocessing a named interface unit.
2358   /// Note that a module implementation partition is not considered as an
2359   /// named interface unit here although it is importable
2360   /// to ease the parsing.
isInNamedInterfaceUnit()2361   bool isInNamedInterfaceUnit() const {
2362     return ModuleDeclState.isNamedInterface();
2363   }
2364 
2365   /// Get the named module name we're preprocessing.
2366   /// Requires we're preprocessing a named module.
getNamedModuleName()2367   StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }
2368 
2369   /// If we are implementing an implementation module unit.
2370   /// Note that the module implementation partition is not considered as an
2371   /// implementation unit.
isInImplementationUnit()2372   bool isInImplementationUnit() const {
2373     return ModuleDeclState.isImplementationUnit();
2374   }
2375 
2376   /// If we're importing a standard C++20 Named Modules.
isInImportingCXXNamedModules()2377   bool isInImportingCXXNamedModules() const {
2378     // NamedModuleImportPath will be non-empty only if we're importing
2379     // Standard C++ named modules.
2380     return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules &&
2381            !IsAtImport;
2382   }
2383 
2384   /// Allocate a new MacroInfo object with the provided SourceLocation.
2385   MacroInfo *AllocateMacroInfo(SourceLocation L);
2386 
2387   /// Turn the specified lexer token into a fully checked and spelled
2388   /// filename, e.g. as an operand of \#include.
2389   ///
2390   /// The caller is expected to provide a buffer that is large enough to hold
2391   /// the spelling of the filename, but is also expected to handle the case
2392   /// when this method decides to use a different buffer.
2393   ///
2394   /// \returns true if the input filename was in <>'s or false if it was
2395   /// in ""'s.
2396   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2397 
2398   /// Given a "foo" or \<foo> reference, look up the indicated file.
2399   ///
2400   /// Returns std::nullopt on failure.  \p isAngled indicates whether the file
2401   /// reference is for system \#include's or not (i.e. using <> instead of "").
2402   OptionalFileEntryRef
2403   LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2404              ConstSearchDirIterator FromDir, const FileEntry *FromFile,
2405              ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath,
2406              SmallVectorImpl<char> *RelativePath,
2407              ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2408              bool *IsFrameworkFound, bool SkipCache = false,
2409              bool OpenFile = true, bool CacheFailures = true);
2410 
2411   /// Return true if we're in the top-level file, not in a \#include.
2412   bool isInPrimaryFile() const;
2413 
2414   /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2415   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
2416   bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2417 
2418   bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2419                       bool *ShadowFlag = nullptr);
2420 
2421   void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2422   Module *LeaveSubmodule(bool ForPragma);
2423 
2424 private:
2425   friend void TokenLexer::ExpandFunctionArguments();
2426 
PushIncludeMacroStack()2427   void PushIncludeMacroStack() {
2428     assert(CurLexerCallback != CLK_CachingLexer &&
2429            "cannot push a caching lexer");
2430     IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule,
2431                                    std::move(CurLexer), CurPPLexer,
2432                                    std::move(CurTokenLexer), CurDirLookup);
2433     CurPPLexer = nullptr;
2434   }
2435 
PopIncludeMacroStack()2436   void PopIncludeMacroStack() {
2437     CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2438     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2439     CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2440     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
2441     CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2442     CurLexerCallback = IncludeMacroStack.back().CurLexerCallback;
2443     IncludeMacroStack.pop_back();
2444   }
2445 
2446   void PropagateLineStartLeadingSpaceInfo(Token &Result);
2447 
2448   /// Determine whether we need to create module macros for #defines in the
2449   /// current context.
2450   bool needModuleMacros() const;
2451 
2452   /// Update the set of active module macros and ambiguity flag for a module
2453   /// macro name.
2454   void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2455 
2456   DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2457                                                SourceLocation Loc);
2458   UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2459   VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2460                                                              bool isPublic);
2461 
2462   /// Lex and validate a macro name, which occurs after a
2463   /// \#define or \#undef.
2464   ///
2465   /// \param MacroNameTok Token that represents the name defined or undefined.
2466   /// \param IsDefineUndef Kind if preprocessor directive.
2467   /// \param ShadowFlag Points to flag that is set if macro name shadows
2468   ///                   a keyword.
2469   ///
2470   /// This emits a diagnostic, sets the token kind to eod,
2471   /// and discards the rest of the macro line if the macro name is invalid.
2472   void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2473                      bool *ShadowFlag = nullptr);
2474 
2475   /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2476   /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2477   /// doing so performs certain validity checks including (but not limited to):
2478   ///   - # (stringization) is followed by a macro parameter
2479   /// \param MacroNameTok - Token that represents the macro name
2480   /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2481   ///
2482   ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2483   ///  returns a nullptr if an invalid sequence of tokens is encountered.
2484   MacroInfo *ReadOptionalMacroParameterListAndBody(
2485       const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2486 
2487   /// The ( starting an argument list of a macro definition has just been read.
2488   /// Lex the rest of the parameters and the closing ), updating \p MI with
2489   /// what we learn and saving in \p LastTok the last token read.
2490   /// Return true if an error occurs parsing the arg list.
2491   bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2492 
2493   /// Provide a suggestion for a typoed directive. If there is no typo, then
2494   /// just skip suggesting.
2495   ///
2496   /// \param Tok - Token that represents the directive
2497   /// \param Directive - String reference for the directive name
2498   void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const;
2499 
2500   /// We just read a \#if or related directive and decided that the
2501   /// subsequent tokens are in the \#if'd out portion of the
2502   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
2503   /// FoundNonSkipPortion is true, then we have already emitted code for part of
2504   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2505   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2506   /// already seen one so a \#else directive is a duplicate.  When this returns,
2507   /// the caller can lex the first valid token.
2508   void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2509                                     SourceLocation IfTokenLoc,
2510                                     bool FoundNonSkipPortion, bool FoundElse,
2511                                     SourceLocation ElseLoc = SourceLocation());
2512 
2513   /// Information about the result for evaluating an expression for a
2514   /// preprocessor directive.
2515   struct DirectiveEvalResult {
2516     /// Whether the expression was evaluated as true or not.
2517     bool Conditional;
2518 
2519     /// True if the expression contained identifiers that were undefined.
2520     bool IncludedUndefinedIds;
2521 
2522     /// The source range for the expression.
2523     SourceRange ExprRange;
2524   };
2525 
2526   /// Evaluate an integer constant expression that may occur after a
2527   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2528   ///
2529   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2530   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
2531 
2532   /// Process a '__has_include("path")' expression.
2533   ///
2534   /// Returns true if successful.
2535   bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II);
2536 
2537   /// Process '__has_include_next("path")' expression.
2538   ///
2539   /// Returns true if successful.
2540   bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II);
2541 
2542   /// Get the directory and file from which to start \#include_next lookup.
2543   std::pair<ConstSearchDirIterator, const FileEntry *>
2544   getIncludeNextStart(const Token &IncludeNextTok) const;
2545 
2546   /// Install the standard preprocessor pragmas:
2547   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2548   void RegisterBuiltinPragmas();
2549 
2550   /// Register builtin macros such as __LINE__ with the identifier table.
2551   void RegisterBuiltinMacros();
2552 
2553   /// If an identifier token is read that is to be expanded as a macro, handle
2554   /// it and return the next token as 'Tok'.  If we lexed a token, return true;
2555   /// otherwise the caller should lex again.
2556   bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2557 
2558   /// Cache macro expanded tokens for TokenLexers.
2559   //
2560   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2561   /// going to lex in the cache and when it finishes the tokens are removed
2562   /// from the end of the cache.
2563   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2564                                   ArrayRef<Token> tokens);
2565 
2566   void removeCachedMacroExpandedTokensOfLastLexer();
2567 
2568   /// Determine whether the next preprocessor token to be
2569   /// lexed is a '('.  If so, consume the token and return true, if not, this
2570   /// method should have no observable side-effect on the lexed tokens.
2571   bool isNextPPTokenLParen();
2572 
2573   /// After reading "MACRO(", this method is invoked to read all of the formal
2574   /// arguments specified for the macro invocation.  Returns null on error.
2575   MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2576                                        SourceLocation &MacroEnd);
2577 
2578   /// If an identifier token is read that is to be expanded
2579   /// as a builtin macro, handle it and return the next token as 'Tok'.
2580   void ExpandBuiltinMacro(Token &Tok);
2581 
2582   /// Read a \c _Pragma directive, slice it up, process it, then
2583   /// return the first token after the directive.
2584   /// This assumes that the \c _Pragma token has just been read into \p Tok.
2585   void Handle_Pragma(Token &Tok);
2586 
2587   /// Like Handle_Pragma except the pragma text is not enclosed within
2588   /// a string literal.
2589   void HandleMicrosoft__pragma(Token &Tok);
2590 
2591   /// Add a lexer to the top of the include stack and
2592   /// start lexing tokens from it instead of the current buffer.
2593   void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir);
2594 
2595   /// Set the FileID for the preprocessor predefines.
setPredefinesFileID(FileID FID)2596   void setPredefinesFileID(FileID FID) {
2597     assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2598     PredefinesFileID = FID;
2599   }
2600 
2601   /// Set the FileID for the PCH through header.
2602   void setPCHThroughHeaderFileID(FileID FID);
2603 
2604   /// Returns true if we are lexing from a file and not a
2605   /// pragma or a macro.
IsFileLexer(const Lexer * L,const PreprocessorLexer * P)2606   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2607     return L ? !L->isPragmaLexer() : P != nullptr;
2608   }
2609 
IsFileLexer(const IncludeStackInfo & I)2610   static bool IsFileLexer(const IncludeStackInfo& I) {
2611     return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2612   }
2613 
IsFileLexer()2614   bool IsFileLexer() const {
2615     return IsFileLexer(CurLexer.get(), CurPPLexer);
2616   }
2617 
2618   //===--------------------------------------------------------------------===//
2619   // Caching stuff.
2620   void CachingLex(Token &Result);
2621 
InCachingLexMode()2622   bool InCachingLexMode() const {
2623     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2624     // that we are past EOF, not that we are in CachingLex mode.
2625     return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2626   }
2627 
2628   void EnterCachingLexMode();
2629   void EnterCachingLexModeUnchecked();
2630 
ExitCachingLexMode()2631   void ExitCachingLexMode() {
2632     if (InCachingLexMode())
2633       RemoveTopOfLexerStack();
2634   }
2635 
2636   const Token &PeekAhead(unsigned N);
2637   void AnnotatePreviousCachedTokens(const Token &Tok);
2638 
2639   //===--------------------------------------------------------------------===//
2640   /// Handle*Directive - implement the various preprocessor directives.  These
2641   /// should side-effect the current preprocessor object so that the next call
2642   /// to Lex() will return the appropriate token next.
2643   void HandleLineDirective();
2644   void HandleDigitDirective(Token &Tok);
2645   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2646   void HandleIdentSCCSDirective(Token &Tok);
2647   void HandleMacroPublicDirective(Token &Tok);
2648   void HandleMacroPrivateDirective();
2649 
2650   /// An additional notification that can be produced by a header inclusion or
2651   /// import to tell the parser what happened.
2652   struct ImportAction {
2653     enum ActionKind {
2654       None,
2655       ModuleBegin,
2656       ModuleImport,
2657       HeaderUnitImport,
2658       SkippedModuleImport,
2659       Failure,
2660     } Kind;
2661     Module *ModuleForHeader = nullptr;
2662 
2663     ImportAction(ActionKind AK, Module *Mod = nullptr)
KindImportAction2664         : Kind(AK), ModuleForHeader(Mod) {
2665       assert((AK == None || Mod || AK == Failure) &&
2666              "no module for module action");
2667     }
2668   };
2669 
2670   OptionalFileEntryRef LookupHeaderIncludeOrImport(
2671       ConstSearchDirIterator *CurDir, StringRef &Filename,
2672       SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2673       const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2674       bool &IsMapped, ConstSearchDirIterator LookupFrom,
2675       const FileEntry *LookupFromFile, StringRef &LookupFilename,
2676       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2677       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2678 
2679   // File inclusion.
2680   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2681                               ConstSearchDirIterator LookupFrom = nullptr,
2682                               const FileEntry *LookupFromFile = nullptr);
2683   ImportAction
2684   HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2685                               Token &FilenameTok, SourceLocation EndLoc,
2686                               ConstSearchDirIterator LookupFrom = nullptr,
2687                               const FileEntry *LookupFromFile = nullptr);
2688   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2689   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2690   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2691   void HandleMicrosoftImportDirective(Token &Tok);
2692 
2693 public:
2694   /// Check that the given module is available, producing a diagnostic if not.
2695   /// \return \c true if the check failed (because the module is not available).
2696   ///         \c false if the module appears to be usable.
2697   static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2698                                      const TargetInfo &TargetInfo,
2699                                      const Module &M, DiagnosticsEngine &Diags);
2700 
2701   // Module inclusion testing.
2702   /// Find the module that owns the source or header file that
2703   /// \p Loc points to. If the location is in a file that was included
2704   /// into a module, or is outside any module, returns nullptr.
2705   Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual);
2706 
2707   /// We want to produce a diagnostic at location IncLoc concerning an
2708   /// unreachable effect at location MLoc (eg, where a desired entity was
2709   /// declared or defined). Determine whether the right way to make MLoc
2710   /// reachable is by #include, and if so, what header should be included.
2711   ///
2712   /// This is not necessarily fast, and might load unexpected module maps, so
2713   /// should only be called by code that intends to produce an error.
2714   ///
2715   /// \param IncLoc The location at which the missing effect was detected.
2716   /// \param MLoc A location within an unimported module at which the desired
2717   ///        effect occurred.
2718   /// \return A file that can be #included to provide the desired effect. Null
2719   ///         if no such file could be determined or if a #include is not
2720   ///         appropriate (eg, if a module should be imported instead).
2721   OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2722                                                         SourceLocation MLoc);
2723 
isRecordingPreamble()2724   bool isRecordingPreamble() const {
2725     return PreambleConditionalStack.isRecording();
2726   }
2727 
hasRecordedPreamble()2728   bool hasRecordedPreamble() const {
2729     return PreambleConditionalStack.hasRecordedPreamble();
2730   }
2731 
getPreambleConditionalStack()2732   ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2733       return PreambleConditionalStack.getStack();
2734   }
2735 
setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s)2736   void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2737     PreambleConditionalStack.setStack(s);
2738   }
2739 
setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,std::optional<PreambleSkipInfo> SkipInfo)2740   void setReplayablePreambleConditionalStack(
2741       ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) {
2742     PreambleConditionalStack.startReplaying();
2743     PreambleConditionalStack.setStack(s);
2744     PreambleConditionalStack.SkipInfo = SkipInfo;
2745   }
2746 
getPreambleSkipInfo()2747   std::optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2748     return PreambleConditionalStack.SkipInfo;
2749   }
2750 
2751 private:
2752   /// After processing predefined file, initialize the conditional stack from
2753   /// the preamble.
2754   void replayPreambleConditionalStack();
2755 
2756   // Macro handling.
2757   void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2758   void HandleUndefDirective();
2759 
2760   // Conditional Inclusion.
2761   void HandleIfdefDirective(Token &Result, const Token &HashToken,
2762                             bool isIfndef, bool ReadAnyTokensBeforeDirective);
2763   void HandleIfDirective(Token &IfToken, const Token &HashToken,
2764                          bool ReadAnyTokensBeforeDirective);
2765   void HandleEndifDirective(Token &EndifToken);
2766   void HandleElseDirective(Token &Result, const Token &HashToken);
2767   void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2768                                  tok::PPKeywordKind Kind);
2769 
2770   // Pragmas.
2771   void HandlePragmaDirective(PragmaIntroducer Introducer);
2772 
2773 public:
2774   void HandlePragmaOnce(Token &OnceTok);
2775   void HandlePragmaMark(Token &MarkTok);
2776   void HandlePragmaPoison();
2777   void HandlePragmaSystemHeader(Token &SysHeaderTok);
2778   void HandlePragmaDependency(Token &DependencyTok);
2779   void HandlePragmaPushMacro(Token &Tok);
2780   void HandlePragmaPopMacro(Token &Tok);
2781   void HandlePragmaIncludeAlias(Token &Tok);
2782   void HandlePragmaModuleBuild(Token &Tok);
2783   void HandlePragmaHdrstop(Token &Tok);
2784   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2785 
2786   // Return true and store the first token only if any CommentHandler
2787   // has inserted some tokens and getCommentRetentionState() is false.
2788   bool HandleComment(Token &result, SourceRange Comment);
2789 
2790   /// A macro is used, update information about macros that need unused
2791   /// warnings.
2792   void markMacroAsUsed(MacroInfo *MI);
2793 
addMacroDeprecationMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2794   void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
2795                               SourceLocation AnnotationLoc) {
2796     auto Annotations = AnnotationInfos.find(II);
2797     if (Annotations == AnnotationInfos.end())
2798       AnnotationInfos.insert(std::make_pair(
2799           II,
2800           MacroAnnotations::makeDeprecation(AnnotationLoc, std::move(Msg))));
2801     else
2802       Annotations->second.DeprecationInfo =
2803           MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2804   }
2805 
addRestrictExpansionMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2806   void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
2807                                SourceLocation AnnotationLoc) {
2808     auto Annotations = AnnotationInfos.find(II);
2809     if (Annotations == AnnotationInfos.end())
2810       AnnotationInfos.insert(
2811           std::make_pair(II, MacroAnnotations::makeRestrictExpansion(
2812                                  AnnotationLoc, std::move(Msg))));
2813     else
2814       Annotations->second.RestrictExpansionInfo =
2815           MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2816   }
2817 
addFinalLoc(const IdentifierInfo * II,SourceLocation AnnotationLoc)2818   void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
2819     auto Annotations = AnnotationInfos.find(II);
2820     if (Annotations == AnnotationInfos.end())
2821       AnnotationInfos.insert(
2822           std::make_pair(II, MacroAnnotations::makeFinal(AnnotationLoc)));
2823     else
2824       Annotations->second.FinalAnnotationLoc = AnnotationLoc;
2825   }
2826 
getMacroAnnotations(const IdentifierInfo * II)2827   const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
2828     return AnnotationInfos.find(II)->second;
2829   }
2830 
2831   void emitMacroExpansionWarnings(const Token &Identifier,
2832                                   bool IsIfnDef = false) const {
2833     IdentifierInfo *Info = Identifier.getIdentifierInfo();
2834     if (Info->isDeprecatedMacro())
2835       emitMacroDeprecationWarning(Identifier);
2836 
2837     if (Info->isRestrictExpansion() &&
2838         !SourceMgr.isInMainFile(Identifier.getLocation()))
2839       emitRestrictExpansionWarning(Identifier);
2840 
2841     if (!IsIfnDef) {
2842       if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs)
2843         emitRestrictInfNaNWarning(Identifier, 0);
2844       if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs)
2845         emitRestrictInfNaNWarning(Identifier, 1);
2846     }
2847   }
2848 
2849   static void processPathForFileMacro(SmallVectorImpl<char> &Path,
2850                                       const LangOptions &LangOpts,
2851                                       const TargetInfo &TI);
2852 
2853   static void processPathToFileName(SmallVectorImpl<char> &FileName,
2854                                     const PresumedLoc &PLoc,
2855                                     const LangOptions &LangOpts,
2856                                     const TargetInfo &TI);
2857 
2858 private:
2859   void emitMacroDeprecationWarning(const Token &Identifier) const;
2860   void emitRestrictExpansionWarning(const Token &Identifier) const;
2861   void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
2862   void emitRestrictInfNaNWarning(const Token &Identifier,
2863                                  unsigned DiagSelection) const;
2864 
2865   /// This boolean state keeps track if the current scanned token (by this PP)
2866   /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a
2867   /// translation unit in a linear order.
2868   bool InSafeBufferOptOutRegion = false;
2869 
2870   /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out
2871   /// region if PP is currently in such a region.  Hold undefined value
2872   /// otherwise.
2873   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
2874 
2875   // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
2876   // translation unit. Each region is represented by a pair of start and end
2877   // locations.  A region is "open" if its' start and end locations are
2878   // identical.
2879   SmallVector<std::pair<SourceLocation, SourceLocation>, 8> SafeBufferOptOutMap;
2880 
2881 public:
2882   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
2883   /// region.  This `Loc` must be a source location that has been pre-processed.
2884   bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const;
2885 
2886   /// Alter the state of whether this PP currently is in a
2887   /// "-Wunsafe-buffer-usage" opt-out region.
2888   ///
2889   /// \param isEnter true if this PP is entering a region; otherwise, this PP
2890   /// is exiting a region
2891   /// \param Loc the location of the entry or exit of a
2892   /// region
2893   /// \return true iff it is INVALID to enter or exit a region, i.e.,
2894   /// attempt to enter a region before exiting a previous region, or exiting a
2895   /// region that PP is not currently in.
2896   bool enterOrExitSafeBufferOptOutRegion(bool isEnter,
2897                                          const SourceLocation &Loc);
2898 
2899   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
2900   ///          opt-out region
2901   bool isPPInSafeBufferOptOutRegion();
2902 
2903   /// \param StartLoc output argument. It will be set to the start location of
2904   /// the current "-Wunsafe-buffer-usage" opt-out region iff this function
2905   /// returns true.
2906   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
2907   ///          opt-out region
2908   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
2909 
2910 private:
2911   /// Helper functions to forward lexing to the actual lexer. They all share the
2912   /// same signature.
CLK_Lexer(Preprocessor & P,Token & Result)2913   static bool CLK_Lexer(Preprocessor &P, Token &Result) {
2914     return P.CurLexer->Lex(Result);
2915   }
CLK_TokenLexer(Preprocessor & P,Token & Result)2916   static bool CLK_TokenLexer(Preprocessor &P, Token &Result) {
2917     return P.CurTokenLexer->Lex(Result);
2918   }
CLK_CachingLexer(Preprocessor & P,Token & Result)2919   static bool CLK_CachingLexer(Preprocessor &P, Token &Result) {
2920     P.CachingLex(Result);
2921     return true;
2922   }
CLK_DependencyDirectivesLexer(Preprocessor & P,Token & Result)2923   static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) {
2924     return P.CurLexer->LexDependencyDirectiveToken(Result);
2925   }
CLK_LexAfterModuleImport(Preprocessor & P,Token & Result)2926   static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
2927     return P.LexAfterModuleImport(Result);
2928   }
2929 };
2930 
2931 /// Abstract base class that describes a handler that will receive
2932 /// source ranges for each of the comments encountered in the source file.
2933 class CommentHandler {
2934 public:
2935   virtual ~CommentHandler();
2936 
2937   // The handler shall return true if it has pushed any tokens
2938   // to be read using e.g. EnterToken or EnterTokenStream.
2939   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
2940 };
2941 
2942 /// Abstract base class that describes a handler that will receive
2943 /// source ranges for empty lines encountered in the source file.
2944 class EmptylineHandler {
2945 public:
2946   virtual ~EmptylineHandler();
2947 
2948   // The handler handles empty lines.
2949   virtual void HandleEmptyline(SourceRange Range) = 0;
2950 };
2951 
2952 /// Registry of pragma handlers added by plugins
2953 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
2954 
2955 } // namespace clang
2956 
2957 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
2958