1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 15 #define LLVM_CLANG_LEX_PREPROCESSOR_H 16 17 #include "clang/Basic/Diagnostic.h" 18 #include "clang/Basic/DiagnosticIDs.h" 19 #include "clang/Basic/IdentifierTable.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/LangOptions.h" 22 #include "clang/Basic/Module.h" 23 #include "clang/Basic/SourceLocation.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Basic/TokenKinds.h" 26 #include "clang/Lex/HeaderSearch.h" 27 #include "clang/Lex/Lexer.h" 28 #include "clang/Lex/MacroInfo.h" 29 #include "clang/Lex/ModuleLoader.h" 30 #include "clang/Lex/ModuleMap.h" 31 #include "clang/Lex/PPCallbacks.h" 32 #include "clang/Lex/Token.h" 33 #include "clang/Lex/TokenLexer.h" 34 #include "llvm/ADT/ArrayRef.h" 35 #include "llvm/ADT/DenseMap.h" 36 #include "llvm/ADT/FoldingSet.h" 37 #include "llvm/ADT/FunctionExtras.h" 38 #include "llvm/ADT/PointerUnion.h" 39 #include "llvm/ADT/STLExtras.h" 40 #include "llvm/ADT/SmallPtrSet.h" 41 #include "llvm/ADT/SmallVector.h" 42 #include "llvm/ADT/StringRef.h" 43 #include "llvm/ADT/TinyPtrVector.h" 44 #include "llvm/ADT/iterator_range.h" 45 #include "llvm/Support/Allocator.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Registry.h" 48 #include <cassert> 49 #include <cstddef> 50 #include <cstdint> 51 #include <map> 52 #include <memory> 53 #include <optional> 54 #include <string> 55 #include <utility> 56 #include <vector> 57 58 namespace llvm { 59 60 template<unsigned InternalLen> class SmallString; 61 62 } // namespace llvm 63 64 namespace clang { 65 66 class CodeCompletionHandler; 67 class CommentHandler; 68 class DirectoryEntry; 69 class EmptylineHandler; 70 class ExternalPreprocessorSource; 71 class FileEntry; 72 class FileManager; 73 class HeaderSearch; 74 class MacroArgs; 75 class PragmaHandler; 76 class PragmaNamespace; 77 class PreprocessingRecord; 78 class PreprocessorLexer; 79 class PreprocessorOptions; 80 class ScratchBuffer; 81 class TargetInfo; 82 83 namespace Builtin { 84 class Context; 85 } 86 87 /// Stores token information for comparing actual tokens with 88 /// predefined values. Only handles simple tokens and identifiers. 89 class TokenValue { 90 tok::TokenKind Kind; 91 IdentifierInfo *II; 92 93 public: TokenValue(tok::TokenKind Kind)94 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 95 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 96 assert(Kind != tok::identifier && 97 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 98 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 99 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 100 } 101 TokenValue(IdentifierInfo * II)102 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 103 104 bool operator==(const Token &Tok) const { 105 return Tok.getKind() == Kind && 106 (!II || II == Tok.getIdentifierInfo()); 107 } 108 }; 109 110 /// Context in which macro name is used. 111 enum MacroUse { 112 // other than #define or #undef 113 MU_Other = 0, 114 115 // macro name specified in #define 116 MU_Define = 1, 117 118 // macro name specified in #undef 119 MU_Undef = 2 120 }; 121 122 /// Engages in a tight little dance with the lexer to efficiently 123 /// preprocess tokens. 124 /// 125 /// Lexers know only about tokens within a single source file, and don't 126 /// know anything about preprocessor-level issues like the \#include stack, 127 /// token expansion, etc. 128 class Preprocessor { 129 friend class VAOptDefinitionContext; 130 friend class VariadicMacroScopeGuard; 131 132 llvm::unique_function<void(const clang::Token &)> OnToken; 133 std::shared_ptr<PreprocessorOptions> PPOpts; 134 DiagnosticsEngine *Diags; 135 const LangOptions &LangOpts; 136 const TargetInfo *Target = nullptr; 137 const TargetInfo *AuxTarget = nullptr; 138 FileManager &FileMgr; 139 SourceManager &SourceMgr; 140 std::unique_ptr<ScratchBuffer> ScratchBuf; 141 HeaderSearch &HeaderInfo; 142 ModuleLoader &TheModuleLoader; 143 144 /// External source of macros. 145 ExternalPreprocessorSource *ExternalSource; 146 147 /// A BumpPtrAllocator object used to quickly allocate and release 148 /// objects internal to the Preprocessor. 149 llvm::BumpPtrAllocator BP; 150 151 /// Identifiers for builtin macros and other builtins. 152 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 153 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 154 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 155 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 156 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__ 157 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 158 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 159 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 160 IdentifierInfo *Ident__identifier; // __identifier 161 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 162 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__ 163 IdentifierInfo *Ident__has_feature; // __has_feature 164 IdentifierInfo *Ident__has_extension; // __has_extension 165 IdentifierInfo *Ident__has_builtin; // __has_builtin 166 IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin 167 IdentifierInfo *Ident__has_attribute; // __has_attribute 168 IdentifierInfo *Ident__has_include; // __has_include 169 IdentifierInfo *Ident__has_include_next; // __has_include_next 170 IdentifierInfo *Ident__has_warning; // __has_warning 171 IdentifierInfo *Ident__is_identifier; // __is_identifier 172 IdentifierInfo *Ident__building_module; // __building_module 173 IdentifierInfo *Ident__MODULE__; // __MODULE__ 174 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 175 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute 176 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 177 IdentifierInfo *Ident__is_target_arch; // __is_target_arch 178 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor 179 IdentifierInfo *Ident__is_target_os; // __is_target_os 180 IdentifierInfo *Ident__is_target_environment; // __is_target_environment 181 IdentifierInfo *Ident__is_target_variant_os; 182 IdentifierInfo *Ident__is_target_variant_environment; 183 IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD 184 185 // Weak, only valid (and set) while InMacroArgs is true. 186 Token* ArgMacro; 187 188 SourceLocation DATELoc, TIMELoc; 189 190 // FEM_UnsetOnCommandLine means that an explicit evaluation method was 191 // not specified on the command line. The target is queried to set the 192 // default evaluation method. 193 LangOptions::FPEvalMethodKind CurrentFPEvalMethod = 194 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 195 196 // The most recent pragma location where the floating point evaluation 197 // method was modified. This is used to determine whether the 198 // 'pragma clang fp eval_method' was used whithin the current scope. 199 SourceLocation LastFPEvalPragmaLocation; 200 201 LangOptions::FPEvalMethodKind TUFPEvalMethod = 202 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 203 204 // Next __COUNTER__ value, starts at 0. 205 unsigned CounterValue = 0; 206 207 enum { 208 /// Maximum depth of \#includes. 209 MaxAllowedIncludeStackDepth = 200 210 }; 211 212 // State that is set before the preprocessor begins. 213 bool KeepComments : 1; 214 bool KeepMacroComments : 1; 215 bool SuppressIncludeNotFoundError : 1; 216 217 // State that changes while the preprocessor runs: 218 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 219 220 /// Whether the preprocessor owns the header search object. 221 bool OwnsHeaderSearch : 1; 222 223 /// True if macro expansion is disabled. 224 bool DisableMacroExpansion : 1; 225 226 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 227 /// when parsing preprocessor directives. 228 bool MacroExpansionInDirectivesOverride : 1; 229 230 class ResetMacroExpansionHelper; 231 232 /// Whether we have already loaded macros from the external source. 233 mutable bool ReadMacrosFromExternalSource : 1; 234 235 /// True if pragmas are enabled. 236 bool PragmasEnabled : 1; 237 238 /// True if the current build action is a preprocessing action. 239 bool PreprocessedOutput : 1; 240 241 /// True if we are currently preprocessing a #if or #elif directive 242 bool ParsingIfOrElifDirective; 243 244 /// True if we are pre-expanding macro arguments. 245 bool InMacroArgPreExpansion; 246 247 /// Mapping/lookup information for all identifiers in 248 /// the program, including program keywords. 249 mutable IdentifierTable Identifiers; 250 251 /// This table contains all the selectors in the program. 252 /// 253 /// Unlike IdentifierTable above, this table *isn't* populated by the 254 /// preprocessor. It is declared/expanded here because its role/lifetime is 255 /// conceptually similar to the IdentifierTable. In addition, the current 256 /// control flow (in clang::ParseAST()), make it convenient to put here. 257 /// 258 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 259 /// the lifetime of the preprocessor. 260 SelectorTable Selectors; 261 262 /// Information about builtins. 263 std::unique_ptr<Builtin::Context> BuiltinInfo; 264 265 /// Tracks all of the pragmas that the client registered 266 /// with this preprocessor. 267 std::unique_ptr<PragmaNamespace> PragmaHandlers; 268 269 /// Pragma handlers of the original source is stored here during the 270 /// parsing of a model file. 271 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 272 273 /// Tracks all of the comment handlers that the client registered 274 /// with this preprocessor. 275 std::vector<CommentHandler *> CommentHandlers; 276 277 /// Empty line handler. 278 EmptylineHandler *Emptyline = nullptr; 279 280 /// True to avoid tearing down the lexer etc on EOF 281 bool IncrementalProcessing = false; 282 283 public: 284 /// The kind of translation unit we are processing. 285 const TranslationUnitKind TUKind; 286 287 private: 288 /// The code-completion handler. 289 CodeCompletionHandler *CodeComplete = nullptr; 290 291 /// The file that we're performing code-completion for, if any. 292 const FileEntry *CodeCompletionFile = nullptr; 293 294 /// The offset in file for the code-completion point. 295 unsigned CodeCompletionOffset = 0; 296 297 /// The location for the code-completion point. This gets instantiated 298 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 299 SourceLocation CodeCompletionLoc; 300 301 /// The start location for the file of the code-completion point. 302 /// 303 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 304 /// for preprocessing. 305 SourceLocation CodeCompletionFileLoc; 306 307 /// The source location of the \c import contextual keyword we just 308 /// lexed, if any. 309 SourceLocation ModuleImportLoc; 310 311 /// The import path for named module that we're currently processing. 312 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath; 313 314 /// Whether the import is an `@import` or a standard c++ modules import. 315 bool IsAtImport = false; 316 317 /// Whether the last token we lexed was an '@'. 318 bool LastTokenWasAt = false; 319 320 /// A position within a C++20 import-seq. 321 class StdCXXImportSeq { 322 public: 323 enum State : int { 324 // Positive values represent a number of unclosed brackets. 325 AtTopLevel = 0, 326 AfterTopLevelTokenSeq = -1, 327 AfterExport = -2, 328 AfterImportSeq = -3, 329 }; 330 StdCXXImportSeq(State S)331 StdCXXImportSeq(State S) : S(S) {} 332 333 /// Saw any kind of open bracket. handleOpenBracket()334 void handleOpenBracket() { 335 S = static_cast<State>(std::max<int>(S, 0) + 1); 336 } 337 /// Saw any kind of close bracket other than '}'. handleCloseBracket()338 void handleCloseBracket() { 339 S = static_cast<State>(std::max<int>(S, 1) - 1); 340 } 341 /// Saw a close brace. handleCloseBrace()342 void handleCloseBrace() { 343 handleCloseBracket(); 344 if (S == AtTopLevel && !AfterHeaderName) 345 S = AfterTopLevelTokenSeq; 346 } 347 /// Saw a semicolon. handleSemi()348 void handleSemi() { 349 if (atTopLevel()) { 350 S = AfterTopLevelTokenSeq; 351 AfterHeaderName = false; 352 } 353 } 354 355 /// Saw an 'export' identifier. handleExport()356 void handleExport() { 357 if (S == AfterTopLevelTokenSeq) 358 S = AfterExport; 359 else if (S <= 0) 360 S = AtTopLevel; 361 } 362 /// Saw an 'import' identifier. handleImport()363 void handleImport() { 364 if (S == AfterTopLevelTokenSeq || S == AfterExport) 365 S = AfterImportSeq; 366 else if (S <= 0) 367 S = AtTopLevel; 368 } 369 370 /// Saw a 'header-name' token; do not recognize any more 'import' tokens 371 /// until we reach a top-level semicolon. handleHeaderName()372 void handleHeaderName() { 373 if (S == AfterImportSeq) 374 AfterHeaderName = true; 375 handleMisc(); 376 } 377 378 /// Saw any other token. handleMisc()379 void handleMisc() { 380 if (S <= 0) 381 S = AtTopLevel; 382 } 383 atTopLevel()384 bool atTopLevel() { return S <= 0; } afterImportSeq()385 bool afterImportSeq() { return S == AfterImportSeq; } afterTopLevelSeq()386 bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; } 387 388 private: 389 State S; 390 /// Whether we're in the pp-import-suffix following the header-name in a 391 /// pp-import. If so, a close-brace is not sufficient to end the 392 /// top-level-token-seq of an import-seq. 393 bool AfterHeaderName = false; 394 }; 395 396 /// Our current position within a C++20 import-seq. 397 StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq; 398 399 /// Track whether we are in a Global Module Fragment 400 class TrackGMF { 401 public: 402 enum GMFState : int { 403 GMFActive = 1, 404 MaybeGMF = 0, 405 BeforeGMFIntroducer = -1, 406 GMFAbsentOrEnded = -2, 407 }; 408 TrackGMF(GMFState S)409 TrackGMF(GMFState S) : S(S) {} 410 411 /// Saw a semicolon. handleSemi()412 void handleSemi() { 413 // If it is immediately after the first instance of the module keyword, 414 // then that introduces the GMF. 415 if (S == MaybeGMF) 416 S = GMFActive; 417 } 418 419 /// Saw an 'export' identifier. handleExport()420 void handleExport() { 421 // The presence of an 'export' keyword always ends or excludes a GMF. 422 S = GMFAbsentOrEnded; 423 } 424 425 /// Saw an 'import' identifier. handleImport(bool AfterTopLevelTokenSeq)426 void handleImport(bool AfterTopLevelTokenSeq) { 427 // If we see this before any 'module' kw, then we have no GMF. 428 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 429 S = GMFAbsentOrEnded; 430 } 431 432 /// Saw a 'module' identifier. handleModule(bool AfterTopLevelTokenSeq)433 void handleModule(bool AfterTopLevelTokenSeq) { 434 // This was the first module identifier and not preceded by any token 435 // that would exclude a GMF. It could begin a GMF, but only if directly 436 // followed by a semicolon. 437 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 438 S = MaybeGMF; 439 else 440 S = GMFAbsentOrEnded; 441 } 442 443 /// Saw any other token. handleMisc()444 void handleMisc() { 445 // We saw something other than ; after the 'module' kw, so not a GMF. 446 if (S == MaybeGMF) 447 S = GMFAbsentOrEnded; 448 } 449 inGMF()450 bool inGMF() { return S == GMFActive; } 451 452 private: 453 /// Track the transitions into and out of a Global Module Fragment, 454 /// if one is present. 455 GMFState S; 456 }; 457 458 TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer; 459 460 /// Track the status of the c++20 module decl. 461 /// 462 /// module-declaration: 463 /// 'export'[opt] 'module' module-name module-partition[opt] 464 /// attribute-specifier-seq[opt] ';' 465 /// 466 /// module-name: 467 /// module-name-qualifier[opt] identifier 468 /// 469 /// module-partition: 470 /// ':' module-name-qualifier[opt] identifier 471 /// 472 /// module-name-qualifier: 473 /// identifier '.' 474 /// module-name-qualifier identifier '.' 475 /// 476 /// Transition state: 477 /// 478 /// NotAModuleDecl --- export ---> FoundExport 479 /// NotAModuleDecl --- module ---> ImplementationCandidate 480 /// FoundExport --- module ---> InterfaceCandidate 481 /// ImplementationCandidate --- Identifier ---> ImplementationCandidate 482 /// ImplementationCandidate --- period ---> ImplementationCandidate 483 /// ImplementationCandidate --- colon ---> ImplementationCandidate 484 /// InterfaceCandidate --- Identifier ---> InterfaceCandidate 485 /// InterfaceCandidate --- period ---> InterfaceCandidate 486 /// InterfaceCandidate --- colon ---> InterfaceCandidate 487 /// ImplementationCandidate --- Semi ---> NamedModuleImplementation 488 /// NamedModuleInterface --- Semi ---> NamedModuleInterface 489 /// NamedModuleImplementation --- Anything ---> NamedModuleImplementation 490 /// NamedModuleInterface --- Anything ---> NamedModuleInterface 491 /// 492 /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad 493 /// soon since we don't support any module attributes yet. 494 class ModuleDeclSeq { 495 enum ModuleDeclState : int { 496 NotAModuleDecl, 497 FoundExport, 498 InterfaceCandidate, 499 ImplementationCandidate, 500 NamedModuleInterface, 501 NamedModuleImplementation, 502 }; 503 504 public: 505 ModuleDeclSeq() = default; 506 handleExport()507 void handleExport() { 508 if (State == NotAModuleDecl) 509 State = FoundExport; 510 else if (!isNamedModule()) 511 reset(); 512 } 513 handleModule()514 void handleModule() { 515 if (State == FoundExport) 516 State = InterfaceCandidate; 517 else if (State == NotAModuleDecl) 518 State = ImplementationCandidate; 519 else if (!isNamedModule()) 520 reset(); 521 } 522 handleIdentifier(IdentifierInfo * Identifier)523 void handleIdentifier(IdentifierInfo *Identifier) { 524 if (isModuleCandidate() && Identifier) 525 Name += Identifier->getName().str(); 526 else if (!isNamedModule()) 527 reset(); 528 } 529 handleColon()530 void handleColon() { 531 if (isModuleCandidate()) 532 Name += ":"; 533 else if (!isNamedModule()) 534 reset(); 535 } 536 handlePeriod()537 void handlePeriod() { 538 if (isModuleCandidate()) 539 Name += "."; 540 else if (!isNamedModule()) 541 reset(); 542 } 543 handleSemi()544 void handleSemi() { 545 if (!Name.empty() && isModuleCandidate()) { 546 if (State == InterfaceCandidate) 547 State = NamedModuleInterface; 548 else if (State == ImplementationCandidate) 549 State = NamedModuleImplementation; 550 else 551 llvm_unreachable("Unimaged ModuleDeclState."); 552 } else if (!isNamedModule()) 553 reset(); 554 } 555 handleMisc()556 void handleMisc() { 557 if (!isNamedModule()) 558 reset(); 559 } 560 isModuleCandidate()561 bool isModuleCandidate() const { 562 return State == InterfaceCandidate || State == ImplementationCandidate; 563 } 564 isNamedModule()565 bool isNamedModule() const { 566 return State == NamedModuleInterface || 567 State == NamedModuleImplementation; 568 } 569 isNamedInterface()570 bool isNamedInterface() const { return State == NamedModuleInterface; } 571 isImplementationUnit()572 bool isImplementationUnit() const { 573 return State == NamedModuleImplementation && !getName().contains(':'); 574 } 575 getName()576 StringRef getName() const { 577 assert(isNamedModule() && "Can't get name from a non named module"); 578 return Name; 579 } 580 getPrimaryName()581 StringRef getPrimaryName() const { 582 assert(isNamedModule() && "Can't get name from a non named module"); 583 return getName().split(':').first; 584 } 585 reset()586 void reset() { 587 Name.clear(); 588 State = NotAModuleDecl; 589 } 590 591 private: 592 ModuleDeclState State = NotAModuleDecl; 593 std::string Name; 594 }; 595 596 ModuleDeclSeq ModuleDeclState; 597 598 /// Whether the module import expects an identifier next. Otherwise, 599 /// it expects a '.' or ';'. 600 bool ModuleImportExpectsIdentifier = false; 601 602 /// The identifier and source location of the currently-active 603 /// \#pragma clang arc_cf_code_audited begin. 604 std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo; 605 606 /// The source location of the currently-active 607 /// \#pragma clang assume_nonnull begin. 608 SourceLocation PragmaAssumeNonNullLoc; 609 610 /// Set only for preambles which end with an active 611 /// \#pragma clang assume_nonnull begin. 612 /// 613 /// When the preamble is loaded into the main file, 614 /// `PragmaAssumeNonNullLoc` will be set to this to 615 /// replay the unterminated assume_nonnull. 616 SourceLocation PreambleRecordedPragmaAssumeNonNullLoc; 617 618 /// True if we hit the code-completion point. 619 bool CodeCompletionReached = false; 620 621 /// The code completion token containing the information 622 /// on the stem that is to be code completed. 623 IdentifierInfo *CodeCompletionII = nullptr; 624 625 /// Range for the code completion token. 626 SourceRange CodeCompletionTokenRange; 627 628 /// The directory that the main file should be considered to occupy, 629 /// if it does not correspond to a real file (as happens when building a 630 /// module). 631 OptionalDirectoryEntryRef MainFileDir; 632 633 /// The number of bytes that we will initially skip when entering the 634 /// main file, along with a flag that indicates whether skipping this number 635 /// of bytes will place the lexer at the start of a line. 636 /// 637 /// This is used when loading a precompiled preamble. 638 std::pair<int, bool> SkipMainFilePreamble; 639 640 /// Whether we hit an error due to reaching max allowed include depth. Allows 641 /// to avoid hitting the same error over and over again. 642 bool HasReachedMaxIncludeDepth = false; 643 644 /// The number of currently-active calls to Lex. 645 /// 646 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often 647 /// require asking for multiple additional tokens. This counter makes it 648 /// possible for Lex to detect whether it's producing a token for the end 649 /// of phase 4 of translation or for some other situation. 650 unsigned LexLevel = 0; 651 652 /// The number of (LexLevel 0) preprocessor tokens. 653 unsigned TokenCount = 0; 654 655 /// Preprocess every token regardless of LexLevel. 656 bool PreprocessToken = false; 657 658 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens 659 /// warning, or zero for unlimited. 660 unsigned MaxTokens = 0; 661 SourceLocation MaxTokensOverrideLoc; 662 663 public: 664 struct PreambleSkipInfo { 665 SourceLocation HashTokenLoc; 666 SourceLocation IfTokenLoc; 667 bool FoundNonSkipPortion; 668 bool FoundElse; 669 SourceLocation ElseLoc; 670 PreambleSkipInfoPreambleSkipInfo671 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, 672 bool FoundNonSkipPortion, bool FoundElse, 673 SourceLocation ElseLoc) 674 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc), 675 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse), 676 ElseLoc(ElseLoc) {} 677 }; 678 679 using IncludedFilesSet = llvm::DenseSet<const FileEntry *>; 680 681 private: 682 friend class ASTReader; 683 friend class MacroArgs; 684 685 class PreambleConditionalStackStore { 686 enum State { 687 Off = 0, 688 Recording = 1, 689 Replaying = 2, 690 }; 691 692 public: 693 PreambleConditionalStackStore() = default; 694 startRecording()695 void startRecording() { ConditionalStackState = Recording; } startReplaying()696 void startReplaying() { ConditionalStackState = Replaying; } isRecording()697 bool isRecording() const { return ConditionalStackState == Recording; } isReplaying()698 bool isReplaying() const { return ConditionalStackState == Replaying; } 699 getStack()700 ArrayRef<PPConditionalInfo> getStack() const { 701 return ConditionalStack; 702 } 703 doneReplaying()704 void doneReplaying() { 705 ConditionalStack.clear(); 706 ConditionalStackState = Off; 707 } 708 setStack(ArrayRef<PPConditionalInfo> s)709 void setStack(ArrayRef<PPConditionalInfo> s) { 710 if (!isRecording() && !isReplaying()) 711 return; 712 ConditionalStack.clear(); 713 ConditionalStack.append(s.begin(), s.end()); 714 } 715 hasRecordedPreamble()716 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); } 717 reachedEOFWhileSkipping()718 bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); } 719 clearSkipInfo()720 void clearSkipInfo() { SkipInfo.reset(); } 721 722 std::optional<PreambleSkipInfo> SkipInfo; 723 724 private: 725 SmallVector<PPConditionalInfo, 4> ConditionalStack; 726 State ConditionalStackState = Off; 727 } PreambleConditionalStack; 728 729 /// The current top of the stack that we're lexing from if 730 /// not expanding a macro and we are lexing directly from source code. 731 /// 732 /// Only one of CurLexer, or CurTokenLexer will be non-null. 733 std::unique_ptr<Lexer> CurLexer; 734 735 /// The current top of the stack that we're lexing from 736 /// if not expanding a macro. 737 /// 738 /// This is an alias for CurLexer. 739 PreprocessorLexer *CurPPLexer = nullptr; 740 741 /// Used to find the current FileEntry, if CurLexer is non-null 742 /// and if applicable. 743 /// 744 /// This allows us to implement \#include_next and find directory-specific 745 /// properties. 746 ConstSearchDirIterator CurDirLookup = nullptr; 747 748 /// The current macro we are expanding, if we are expanding a macro. 749 /// 750 /// One of CurLexer and CurTokenLexer must be null. 751 std::unique_ptr<TokenLexer> CurTokenLexer; 752 753 /// The kind of lexer we're currently working with. 754 typedef bool (*LexerCallback)(Preprocessor &, Token &); 755 LexerCallback CurLexerCallback = &CLK_Lexer; 756 757 /// If the current lexer is for a submodule that is being built, this 758 /// is that submodule. 759 Module *CurLexerSubmodule = nullptr; 760 761 /// Keeps track of the stack of files currently 762 /// \#included, and macros currently being expanded from, not counting 763 /// CurLexer/CurTokenLexer. 764 struct IncludeStackInfo { 765 LexerCallback CurLexerCallback; 766 Module *TheSubmodule; 767 std::unique_ptr<Lexer> TheLexer; 768 PreprocessorLexer *ThePPLexer; 769 std::unique_ptr<TokenLexer> TheTokenLexer; 770 ConstSearchDirIterator TheDirLookup; 771 772 // The following constructors are completely useless copies of the default 773 // versions, only needed to pacify MSVC. IncludeStackInfoIncludeStackInfo774 IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule, 775 std::unique_ptr<Lexer> &&TheLexer, 776 PreprocessorLexer *ThePPLexer, 777 std::unique_ptr<TokenLexer> &&TheTokenLexer, 778 ConstSearchDirIterator TheDirLookup) 779 : CurLexerCallback(std::move(CurLexerCallback)), 780 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 781 ThePPLexer(std::move(ThePPLexer)), 782 TheTokenLexer(std::move(TheTokenLexer)), 783 TheDirLookup(std::move(TheDirLookup)) {} 784 }; 785 std::vector<IncludeStackInfo> IncludeMacroStack; 786 787 /// Actions invoked when some preprocessor activity is 788 /// encountered (e.g. a file is \#included, etc). 789 std::unique_ptr<PPCallbacks> Callbacks; 790 791 struct MacroExpandsInfo { 792 Token Tok; 793 MacroDefinition MD; 794 SourceRange Range; 795 MacroExpandsInfoMacroExpandsInfo796 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 797 : Tok(Tok), MD(MD), Range(Range) {} 798 }; 799 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 800 801 /// Information about a name that has been used to define a module macro. 802 struct ModuleMacroInfo { 803 /// The most recent macro directive for this identifier. 804 MacroDirective *MD; 805 806 /// The active module macros for this identifier. 807 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros; 808 809 /// The generation number at which we last updated ActiveModuleMacros. 810 /// \see Preprocessor::VisibleModules. 811 unsigned ActiveModuleMacrosGeneration = 0; 812 813 /// Whether this macro name is ambiguous. 814 bool IsAmbiguous = false; 815 816 /// The module macros that are overridden by this macro. 817 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros; 818 ModuleMacroInfoModuleMacroInfo819 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {} 820 }; 821 822 /// The state of a macro for an identifier. 823 class MacroState { 824 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 825 getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)826 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 827 const IdentifierInfo *II) const { 828 if (II->isOutOfDate()) 829 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 830 // FIXME: Find a spare bit on IdentifierInfo and store a 831 // HasModuleMacros flag. 832 if (!II->hasMacroDefinition() || 833 (!PP.getLangOpts().Modules && 834 !PP.getLangOpts().ModulesLocalVisibility) || 835 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 836 return nullptr; 837 838 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 839 if (!Info) { 840 Info = new (PP.getPreprocessorAllocator()) 841 ModuleMacroInfo(State.get<MacroDirective *>()); 842 State = Info; 843 } 844 845 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 846 Info->ActiveModuleMacrosGeneration) 847 PP.updateModuleMacroInfo(II, *Info); 848 return Info; 849 } 850 851 public: MacroState()852 MacroState() : MacroState(nullptr) {} MacroState(MacroDirective * MD)853 MacroState(MacroDirective *MD) : State(MD) {} 854 MacroState(MacroState && O)855 MacroState(MacroState &&O) noexcept : State(O.State) { 856 O.State = (MacroDirective *)nullptr; 857 } 858 859 MacroState &operator=(MacroState &&O) noexcept { 860 auto S = O.State; 861 O.State = (MacroDirective *)nullptr; 862 State = S; 863 return *this; 864 } 865 ~MacroState()866 ~MacroState() { 867 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 868 Info->~ModuleMacroInfo(); 869 } 870 getLatest()871 MacroDirective *getLatest() const { 872 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 873 return Info->MD; 874 return State.get<MacroDirective*>(); 875 } 876 setLatest(MacroDirective * MD)877 void setLatest(MacroDirective *MD) { 878 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 879 Info->MD = MD; 880 else 881 State = MD; 882 } 883 isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)884 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 885 auto *Info = getModuleInfo(PP, II); 886 return Info ? Info->IsAmbiguous : false; 887 } 888 889 ArrayRef<ModuleMacro *> getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)890 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 891 if (auto *Info = getModuleInfo(PP, II)) 892 return Info->ActiveModuleMacros; 893 return std::nullopt; 894 } 895 findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)896 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 897 SourceManager &SourceMgr) const { 898 // FIXME: Incorporate module macros into the result of this. 899 if (auto *Latest = getLatest()) 900 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 901 return {}; 902 } 903 overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)904 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 905 if (auto *Info = getModuleInfo(PP, II)) { 906 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 907 Info->ActiveModuleMacros.begin(), 908 Info->ActiveModuleMacros.end()); 909 Info->ActiveModuleMacros.clear(); 910 Info->IsAmbiguous = false; 911 } 912 } 913 getOverriddenMacros()914 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 915 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 916 return Info->OverriddenMacros; 917 return std::nullopt; 918 } 919 setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)920 void setOverriddenMacros(Preprocessor &PP, 921 ArrayRef<ModuleMacro *> Overrides) { 922 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 923 if (!Info) { 924 if (Overrides.empty()) 925 return; 926 Info = new (PP.getPreprocessorAllocator()) 927 ModuleMacroInfo(State.get<MacroDirective *>()); 928 State = Info; 929 } 930 Info->OverriddenMacros.clear(); 931 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 932 Overrides.begin(), Overrides.end()); 933 Info->ActiveModuleMacrosGeneration = 0; 934 } 935 }; 936 937 /// For each IdentifierInfo that was associated with a macro, we 938 /// keep a mapping to the history of all macro definitions and #undefs in 939 /// the reverse order (the latest one is in the head of the list). 940 /// 941 /// This mapping lives within the \p CurSubmoduleState. 942 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>; 943 944 struct SubmoduleState; 945 946 /// Information about a submodule that we're currently building. 947 struct BuildingSubmoduleInfo { 948 /// The module that we are building. 949 Module *M; 950 951 /// The location at which the module was included. 952 SourceLocation ImportLoc; 953 954 /// Whether we entered this submodule via a pragma. 955 bool IsPragma; 956 957 /// The previous SubmoduleState. 958 SubmoduleState *OuterSubmoduleState; 959 960 /// The number of pending module macro names when we started building this. 961 unsigned OuterPendingModuleMacroNames; 962 BuildingSubmoduleInfoBuildingSubmoduleInfo963 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma, 964 SubmoduleState *OuterSubmoduleState, 965 unsigned OuterPendingModuleMacroNames) 966 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma), 967 OuterSubmoduleState(OuterSubmoduleState), 968 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 969 }; 970 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 971 972 /// Information about a submodule's preprocessor state. 973 struct SubmoduleState { 974 /// The macros for the submodule. 975 MacroMap Macros; 976 977 /// The set of modules that are visible within the submodule. 978 VisibleModuleSet VisibleModules; 979 980 // FIXME: CounterValue? 981 // FIXME: PragmaPushMacroInfo? 982 }; 983 std::map<Module *, SubmoduleState> Submodules; 984 985 /// The preprocessor state for preprocessing outside of any submodule. 986 SubmoduleState NullSubmoduleState; 987 988 /// The current submodule state. Will be \p NullSubmoduleState if we're not 989 /// in a submodule. 990 SubmoduleState *CurSubmoduleState; 991 992 /// The files that have been included. 993 IncludedFilesSet IncludedFiles; 994 995 /// The set of top-level modules that affected preprocessing, but were not 996 /// imported. 997 llvm::SmallSetVector<Module *, 2> AffectingClangModules; 998 999 /// The set of known macros exported from modules. 1000 llvm::FoldingSet<ModuleMacro> ModuleMacros; 1001 1002 /// The names of potential module macros that we've not yet processed. 1003 llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames; 1004 1005 /// The list of module macros, for each identifier, that are not overridden by 1006 /// any other module macro. 1007 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>> 1008 LeafModuleMacros; 1009 1010 /// Macros that we want to warn because they are not used at the end 1011 /// of the translation unit. 1012 /// 1013 /// We store just their SourceLocations instead of 1014 /// something like MacroInfo*. The benefit of this is that when we are 1015 /// deserializing from PCH, we don't need to deserialize identifier & macros 1016 /// just so that we can report that they are unused, we just warn using 1017 /// the SourceLocations of this set (that will be filled by the ASTReader). 1018 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>; 1019 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 1020 1021 /// This is a pair of an optional message and source location used for pragmas 1022 /// that annotate macros like pragma clang restrict_expansion and pragma clang 1023 /// deprecated. This pair stores the optional message and the location of the 1024 /// annotation pragma for use producing diagnostics and notes. 1025 using MsgLocationPair = std::pair<std::string, SourceLocation>; 1026 1027 struct MacroAnnotationInfo { 1028 SourceLocation Location; 1029 std::string Message; 1030 }; 1031 1032 struct MacroAnnotations { 1033 std::optional<MacroAnnotationInfo> DeprecationInfo; 1034 std::optional<MacroAnnotationInfo> RestrictExpansionInfo; 1035 std::optional<SourceLocation> FinalAnnotationLoc; 1036 makeDeprecationMacroAnnotations1037 static MacroAnnotations makeDeprecation(SourceLocation Loc, 1038 std::string Msg) { 1039 return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)}, 1040 std::nullopt, std::nullopt}; 1041 } 1042 makeRestrictExpansionMacroAnnotations1043 static MacroAnnotations makeRestrictExpansion(SourceLocation Loc, 1044 std::string Msg) { 1045 return MacroAnnotations{ 1046 std::nullopt, MacroAnnotationInfo{Loc, std::move(Msg)}, std::nullopt}; 1047 } 1048 makeFinalMacroAnnotations1049 static MacroAnnotations makeFinal(SourceLocation Loc) { 1050 return MacroAnnotations{std::nullopt, std::nullopt, Loc}; 1051 } 1052 }; 1053 1054 /// Warning information for macro annotations. 1055 llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos; 1056 1057 /// A "freelist" of MacroArg objects that can be 1058 /// reused for quick allocation. 1059 MacroArgs *MacroArgCache = nullptr; 1060 1061 /// For each IdentifierInfo used in a \#pragma push_macro directive, 1062 /// we keep a MacroInfo stack used to restore the previous macro value. 1063 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>> 1064 PragmaPushMacroInfo; 1065 1066 // Various statistics we track for performance analysis. 1067 unsigned NumDirectives = 0; 1068 unsigned NumDefined = 0; 1069 unsigned NumUndefined = 0; 1070 unsigned NumPragma = 0; 1071 unsigned NumIf = 0; 1072 unsigned NumElse = 0; 1073 unsigned NumEndif = 0; 1074 unsigned NumEnteredSourceFiles = 0; 1075 unsigned MaxIncludeStackDepth = 0; 1076 unsigned NumMacroExpanded = 0; 1077 unsigned NumFnMacroExpanded = 0; 1078 unsigned NumBuiltinMacroExpanded = 0; 1079 unsigned NumFastMacroExpanded = 0; 1080 unsigned NumTokenPaste = 0; 1081 unsigned NumFastTokenPaste = 0; 1082 unsigned NumSkipped = 0; 1083 1084 /// The predefined macros that preprocessor should use from the 1085 /// command line etc. 1086 std::string Predefines; 1087 1088 /// The file ID for the preprocessor predefines. 1089 FileID PredefinesFileID; 1090 1091 /// The file ID for the PCH through header. 1092 FileID PCHThroughHeaderFileID; 1093 1094 /// Whether tokens are being skipped until a #pragma hdrstop is seen. 1095 bool SkippingUntilPragmaHdrStop = false; 1096 1097 /// Whether tokens are being skipped until the through header is seen. 1098 bool SkippingUntilPCHThroughHeader = false; 1099 1100 /// \{ 1101 /// Cache of macro expanders to reduce malloc traffic. 1102 enum { TokenLexerCacheSize = 8 }; 1103 unsigned NumCachedTokenLexers; 1104 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 1105 /// \} 1106 1107 /// Keeps macro expanded tokens for TokenLexers. 1108 // 1109 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 1110 /// going to lex in the cache and when it finishes the tokens are removed 1111 /// from the end of the cache. 1112 SmallVector<Token, 16> MacroExpandedTokens; 1113 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack; 1114 1115 /// A record of the macro definitions and expansions that 1116 /// occurred during preprocessing. 1117 /// 1118 /// This is an optional side structure that can be enabled with 1119 /// \c createPreprocessingRecord() prior to preprocessing. 1120 PreprocessingRecord *Record = nullptr; 1121 1122 /// Cached tokens state. 1123 using CachedTokensTy = SmallVector<Token, 1>; 1124 1125 /// Cached tokens are stored here when we do backtracking or 1126 /// lookahead. They are "lexed" by the CachingLex() method. 1127 CachedTokensTy CachedTokens; 1128 1129 /// The position of the cached token that CachingLex() should 1130 /// "lex" next. 1131 /// 1132 /// If it points beyond the CachedTokens vector, it means that a normal 1133 /// Lex() should be invoked. 1134 CachedTokensTy::size_type CachedLexPos = 0; 1135 1136 /// Stack of backtrack positions, allowing nested backtracks. 1137 /// 1138 /// The EnableBacktrackAtThisPos() method pushes a position to 1139 /// indicate where CachedLexPos should be set when the BackTrack() method is 1140 /// invoked (at which point the last position is popped). 1141 std::vector<CachedTokensTy::size_type> BacktrackPositions; 1142 1143 /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running. 1144 /// This is used to guard against calling this function recursively. 1145 /// 1146 /// See comments at the use-site for more context about why it is needed. 1147 bool SkippingExcludedConditionalBlock = false; 1148 1149 /// Keeps track of skipped range mappings that were recorded while skipping 1150 /// excluded conditional directives. It maps the source buffer pointer at 1151 /// the beginning of a skipped block, to the number of bytes that should be 1152 /// skipped. 1153 llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges; 1154 1155 void updateOutOfDateIdentifier(IdentifierInfo &II) const; 1156 1157 public: 1158 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 1159 DiagnosticsEngine &diags, const LangOptions &LangOpts, 1160 SourceManager &SM, HeaderSearch &Headers, 1161 ModuleLoader &TheModuleLoader, 1162 IdentifierInfoLookup *IILookup = nullptr, 1163 bool OwnsHeaderSearch = false, 1164 TranslationUnitKind TUKind = TU_Complete); 1165 1166 ~Preprocessor(); 1167 1168 /// Initialize the preprocessor using information about the target. 1169 /// 1170 /// \param Target is owned by the caller and must remain valid for the 1171 /// lifetime of the preprocessor. 1172 /// \param AuxTarget is owned by the caller and must remain valid for 1173 /// the lifetime of the preprocessor. 1174 void Initialize(const TargetInfo &Target, 1175 const TargetInfo *AuxTarget = nullptr); 1176 1177 /// Initialize the preprocessor to parse a model file 1178 /// 1179 /// To parse model files the preprocessor of the original source is reused to 1180 /// preserver the identifier table. However to avoid some duplicate 1181 /// information in the preprocessor some cleanup is needed before it is used 1182 /// to parse model files. This method does that cleanup. 1183 void InitializeForModelFile(); 1184 1185 /// Cleanup after model file parsing 1186 void FinalizeForModelFile(); 1187 1188 /// Retrieve the preprocessor options used to initialize this 1189 /// preprocessor. getPreprocessorOpts()1190 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 1191 getDiagnostics()1192 DiagnosticsEngine &getDiagnostics() const { return *Diags; } setDiagnostics(DiagnosticsEngine & D)1193 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 1194 getLangOpts()1195 const LangOptions &getLangOpts() const { return LangOpts; } getTargetInfo()1196 const TargetInfo &getTargetInfo() const { return *Target; } getAuxTargetInfo()1197 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } getFileManager()1198 FileManager &getFileManager() const { return FileMgr; } getSourceManager()1199 SourceManager &getSourceManager() const { return SourceMgr; } getHeaderSearchInfo()1200 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 1201 getIdentifierTable()1202 IdentifierTable &getIdentifierTable() { return Identifiers; } getIdentifierTable()1203 const IdentifierTable &getIdentifierTable() const { return Identifiers; } getSelectorTable()1204 SelectorTable &getSelectorTable() { return Selectors; } getBuiltinInfo()1205 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; } getPreprocessorAllocator()1206 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 1207 setExternalSource(ExternalPreprocessorSource * Source)1208 void setExternalSource(ExternalPreprocessorSource *Source) { 1209 ExternalSource = Source; 1210 } 1211 getExternalSource()1212 ExternalPreprocessorSource *getExternalSource() const { 1213 return ExternalSource; 1214 } 1215 1216 /// Retrieve the module loader associated with this preprocessor. getModuleLoader()1217 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 1218 hadModuleLoaderFatalFailure()1219 bool hadModuleLoaderFatalFailure() const { 1220 return TheModuleLoader.HadFatalFailure; 1221 } 1222 1223 /// Retrieve the number of Directives that have been processed by the 1224 /// Preprocessor. getNumDirectives()1225 unsigned getNumDirectives() const { 1226 return NumDirectives; 1227 } 1228 1229 /// True if we are currently preprocessing a #if or #elif directive isParsingIfOrElifDirective()1230 bool isParsingIfOrElifDirective() const { 1231 return ParsingIfOrElifDirective; 1232 } 1233 1234 /// Control whether the preprocessor retains comments in output. SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)1235 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 1236 this->KeepComments = KeepComments | KeepMacroComments; 1237 this->KeepMacroComments = KeepMacroComments; 1238 } 1239 getCommentRetentionState()1240 bool getCommentRetentionState() const { return KeepComments; } 1241 setPragmasEnabled(bool Enabled)1242 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } getPragmasEnabled()1243 bool getPragmasEnabled() const { return PragmasEnabled; } 1244 SetSuppressIncludeNotFoundError(bool Suppress)1245 void SetSuppressIncludeNotFoundError(bool Suppress) { 1246 SuppressIncludeNotFoundError = Suppress; 1247 } 1248 GetSuppressIncludeNotFoundError()1249 bool GetSuppressIncludeNotFoundError() { 1250 return SuppressIncludeNotFoundError; 1251 } 1252 1253 /// Sets whether the preprocessor is responsible for producing output or if 1254 /// it is producing tokens to be consumed by Parse and Sema. setPreprocessedOutput(bool IsPreprocessedOutput)1255 void setPreprocessedOutput(bool IsPreprocessedOutput) { 1256 PreprocessedOutput = IsPreprocessedOutput; 1257 } 1258 1259 /// Returns true if the preprocessor is responsible for generating output, 1260 /// false if it is producing tokens to be consumed by Parse and Sema. isPreprocessedOutput()1261 bool isPreprocessedOutput() const { return PreprocessedOutput; } 1262 1263 /// Return true if we are lexing directly from the specified lexer. isCurrentLexer(const PreprocessorLexer * L)1264 bool isCurrentLexer(const PreprocessorLexer *L) const { 1265 return CurPPLexer == L; 1266 } 1267 1268 /// Return the current lexer being lexed from. 1269 /// 1270 /// Note that this ignores any potentially active macro expansions and _Pragma 1271 /// expansions going on at the time. getCurrentLexer()1272 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 1273 1274 /// Return the current file lexer being lexed from. 1275 /// 1276 /// Note that this ignores any potentially active macro expansions and _Pragma 1277 /// expansions going on at the time. 1278 PreprocessorLexer *getCurrentFileLexer() const; 1279 1280 /// Return the submodule owning the file being lexed. This may not be 1281 /// the current module if we have changed modules since entering the file. getCurrentLexerSubmodule()1282 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; } 1283 1284 /// Returns the FileID for the preprocessor predefines. getPredefinesFileID()1285 FileID getPredefinesFileID() const { return PredefinesFileID; } 1286 1287 /// \{ 1288 /// Accessors for preprocessor callbacks. 1289 /// 1290 /// Note that this class takes ownership of any PPCallbacks object given to 1291 /// it. getPPCallbacks()1292 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } addPPCallbacks(std::unique_ptr<PPCallbacks> C)1293 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 1294 if (Callbacks) 1295 C = std::make_unique<PPChainedCallbacks>(std::move(C), 1296 std::move(Callbacks)); 1297 Callbacks = std::move(C); 1298 } 1299 /// \} 1300 1301 /// Get the number of tokens processed so far. getTokenCount()1302 unsigned getTokenCount() const { return TokenCount; } 1303 1304 /// Get the max number of tokens before issuing a -Wmax-tokens warning. getMaxTokens()1305 unsigned getMaxTokens() const { return MaxTokens; } 1306 overrideMaxTokens(unsigned Value,SourceLocation Loc)1307 void overrideMaxTokens(unsigned Value, SourceLocation Loc) { 1308 MaxTokens = Value; 1309 MaxTokensOverrideLoc = Loc; 1310 }; 1311 getMaxTokensOverrideLoc()1312 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; } 1313 1314 /// Register a function that would be called on each token in the final 1315 /// expanded token stream. 1316 /// This also reports annotation tokens produced by the parser. setTokenWatcher(llvm::unique_function<void (const clang::Token &)> F)1317 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) { 1318 OnToken = std::move(F); 1319 } 1320 setPreprocessToken(bool Preprocess)1321 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; } 1322 isMacroDefined(StringRef Id)1323 bool isMacroDefined(StringRef Id) { 1324 return isMacroDefined(&Identifiers.get(Id)); 1325 } isMacroDefined(const IdentifierInfo * II)1326 bool isMacroDefined(const IdentifierInfo *II) { 1327 return II->hasMacroDefinition() && 1328 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 1329 } 1330 1331 /// Determine whether II is defined as a macro within the module M, 1332 /// if that is a module that we've already preprocessed. Does not check for 1333 /// macros imported into M. isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)1334 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 1335 if (!II->hasMacroDefinition()) 1336 return false; 1337 auto I = Submodules.find(M); 1338 if (I == Submodules.end()) 1339 return false; 1340 auto J = I->second.Macros.find(II); 1341 if (J == I->second.Macros.end()) 1342 return false; 1343 auto *MD = J->second.getLatest(); 1344 return MD && MD->isDefined(); 1345 } 1346 getMacroDefinition(const IdentifierInfo * II)1347 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 1348 if (!II->hasMacroDefinition()) 1349 return {}; 1350 1351 MacroState &S = CurSubmoduleState->Macros[II]; 1352 auto *MD = S.getLatest(); 1353 while (MD && isa<VisibilityMacroDirective>(MD)) 1354 MD = MD->getPrevious(); 1355 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 1356 S.getActiveModuleMacros(*this, II), 1357 S.isAmbiguous(*this, II)); 1358 } 1359 getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)1360 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 1361 SourceLocation Loc) { 1362 if (!II->hadMacroDefinition()) 1363 return {}; 1364 1365 MacroState &S = CurSubmoduleState->Macros[II]; 1366 MacroDirective::DefInfo DI; 1367 if (auto *MD = S.getLatest()) 1368 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 1369 // FIXME: Compute the set of active module macros at the specified location. 1370 return MacroDefinition(DI.getDirective(), 1371 S.getActiveModuleMacros(*this, II), 1372 S.isAmbiguous(*this, II)); 1373 } 1374 1375 /// Given an identifier, return its latest non-imported MacroDirective 1376 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. getLocalMacroDirective(const IdentifierInfo * II)1377 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 1378 if (!II->hasMacroDefinition()) 1379 return nullptr; 1380 1381 auto *MD = getLocalMacroDirectiveHistory(II); 1382 if (!MD || MD->getDefinition().isUndefined()) 1383 return nullptr; 1384 1385 return MD; 1386 } 1387 getMacroInfo(const IdentifierInfo * II)1388 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 1389 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 1390 } 1391 getMacroInfo(const IdentifierInfo * II)1392 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 1393 if (!II->hasMacroDefinition()) 1394 return nullptr; 1395 if (auto MD = getMacroDefinition(II)) 1396 return MD.getMacroInfo(); 1397 return nullptr; 1398 } 1399 1400 /// Given an identifier, return the latest non-imported macro 1401 /// directive for that identifier. 1402 /// 1403 /// One can iterate over all previous macro directives from the most recent 1404 /// one. 1405 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 1406 1407 /// Add a directive to the macro directive history for this identifier. 1408 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)1409 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 1410 SourceLocation Loc) { 1411 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 1412 appendMacroDirective(II, MD); 1413 return MD; 1414 } appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)1415 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 1416 MacroInfo *MI) { 1417 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 1418 } 1419 1420 /// Set a MacroDirective that was loaded from a PCH file. 1421 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, 1422 MacroDirective *MD); 1423 1424 /// Register an exported macro for a module and identifier. 1425 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, 1426 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 1427 ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II); 1428 1429 /// Get the list of leaf (non-overridden) module macros for a name. getLeafModuleMacros(const IdentifierInfo * II)1430 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 1431 if (II->isOutOfDate()) 1432 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 1433 auto I = LeafModuleMacros.find(II); 1434 if (I != LeafModuleMacros.end()) 1435 return I->second; 1436 return std::nullopt; 1437 } 1438 1439 /// Get the list of submodules that we're currently building. getBuildingSubmodules()1440 ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const { 1441 return BuildingSubmoduleStack; 1442 } 1443 1444 /// \{ 1445 /// Iterators for the macro history table. Currently defined macros have 1446 /// IdentifierInfo::hasMacroDefinition() set and an empty 1447 /// MacroInfo::getUndefLoc() at the head of the list. 1448 using macro_iterator = MacroMap::const_iterator; 1449 1450 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 1451 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 1452 1453 llvm::iterator_range<macro_iterator> 1454 macros(bool IncludeExternalMacros = true) const { 1455 macro_iterator begin = macro_begin(IncludeExternalMacros); 1456 macro_iterator end = macro_end(IncludeExternalMacros); 1457 return llvm::make_range(begin, end); 1458 } 1459 1460 /// \} 1461 1462 /// Mark the given clang module as affecting the current clang module or translation unit. markClangModuleAsAffecting(Module * M)1463 void markClangModuleAsAffecting(Module *M) { 1464 assert(M->isModuleMapModule()); 1465 if (!BuildingSubmoduleStack.empty()) { 1466 if (M != BuildingSubmoduleStack.back().M) 1467 BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M); 1468 } else { 1469 AffectingClangModules.insert(M); 1470 } 1471 } 1472 1473 /// Get the set of top-level clang modules that affected preprocessing, but were not 1474 /// imported. getAffectingClangModules()1475 const llvm::SmallSetVector<Module *, 2> &getAffectingClangModules() const { 1476 return AffectingClangModules; 1477 } 1478 1479 /// Mark the file as included. 1480 /// Returns true if this is the first time the file was included. markIncluded(FileEntryRef File)1481 bool markIncluded(FileEntryRef File) { 1482 HeaderInfo.getFileInfo(File); 1483 return IncludedFiles.insert(File).second; 1484 } 1485 1486 /// Return true if this header has already been included. alreadyIncluded(FileEntryRef File)1487 bool alreadyIncluded(FileEntryRef File) const { 1488 HeaderInfo.getFileInfo(File); 1489 return IncludedFiles.count(File); 1490 } 1491 1492 /// Get the set of included files. getIncludedFiles()1493 IncludedFilesSet &getIncludedFiles() { return IncludedFiles; } getIncludedFiles()1494 const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; } 1495 1496 /// Return the name of the macro defined before \p Loc that has 1497 /// spelling \p Tokens. If there are multiple macros with same spelling, 1498 /// return the last one defined. 1499 StringRef getLastMacroWithSpelling(SourceLocation Loc, 1500 ArrayRef<TokenValue> Tokens) const; 1501 1502 /// Get the predefines for this processor. 1503 /// Used by some third-party tools to inspect and add predefines (see 1504 /// https://github.com/llvm/llvm-project/issues/57483). getPredefines()1505 const std::string &getPredefines() const { return Predefines; } 1506 1507 /// Set the predefines for this Preprocessor. 1508 /// 1509 /// These predefines are automatically injected when parsing the main file. setPredefines(std::string P)1510 void setPredefines(std::string P) { Predefines = std::move(P); } 1511 1512 /// Return information about the specified preprocessor 1513 /// identifier token. getIdentifierInfo(StringRef Name)1514 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 1515 return &Identifiers.get(Name); 1516 } 1517 1518 /// Add the specified pragma handler to this preprocessor. 1519 /// 1520 /// If \p Namespace is non-null, then it is a token required to exist on the 1521 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 1522 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); AddPragmaHandler(PragmaHandler * Handler)1523 void AddPragmaHandler(PragmaHandler *Handler) { 1524 AddPragmaHandler(StringRef(), Handler); 1525 } 1526 1527 /// Remove the specific pragma handler from this preprocessor. 1528 /// 1529 /// If \p Namespace is non-null, then it should be the namespace that 1530 /// \p Handler was added to. It is an error to remove a handler that 1531 /// has not been registered. 1532 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); RemovePragmaHandler(PragmaHandler * Handler)1533 void RemovePragmaHandler(PragmaHandler *Handler) { 1534 RemovePragmaHandler(StringRef(), Handler); 1535 } 1536 1537 /// Install empty handlers for all pragmas (making them ignored). 1538 void IgnorePragmas(); 1539 1540 /// Set empty line handler. setEmptylineHandler(EmptylineHandler * Handler)1541 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; } 1542 getEmptylineHandler()1543 EmptylineHandler *getEmptylineHandler() const { return Emptyline; } 1544 1545 /// Add the specified comment handler to the preprocessor. 1546 void addCommentHandler(CommentHandler *Handler); 1547 1548 /// Remove the specified comment handler. 1549 /// 1550 /// It is an error to remove a handler that has not been registered. 1551 void removeCommentHandler(CommentHandler *Handler); 1552 1553 /// Set the code completion handler to the given object. setCodeCompletionHandler(CodeCompletionHandler & Handler)1554 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 1555 CodeComplete = &Handler; 1556 } 1557 1558 /// Retrieve the current code-completion handler. getCodeCompletionHandler()1559 CodeCompletionHandler *getCodeCompletionHandler() const { 1560 return CodeComplete; 1561 } 1562 1563 /// Clear out the code completion handler. clearCodeCompletionHandler()1564 void clearCodeCompletionHandler() { 1565 CodeComplete = nullptr; 1566 } 1567 1568 /// Hook used by the lexer to invoke the "included file" code 1569 /// completion point. 1570 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled); 1571 1572 /// Hook used by the lexer to invoke the "natural language" code 1573 /// completion point. 1574 void CodeCompleteNaturalLanguage(); 1575 1576 /// Set the code completion token for filtering purposes. setCodeCompletionIdentifierInfo(IdentifierInfo * Filter)1577 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) { 1578 CodeCompletionII = Filter; 1579 } 1580 1581 /// Set the code completion token range for detecting replacement range later 1582 /// on. setCodeCompletionTokenRange(const SourceLocation Start,const SourceLocation End)1583 void setCodeCompletionTokenRange(const SourceLocation Start, 1584 const SourceLocation End) { 1585 CodeCompletionTokenRange = {Start, End}; 1586 } getCodeCompletionTokenRange()1587 SourceRange getCodeCompletionTokenRange() const { 1588 return CodeCompletionTokenRange; 1589 } 1590 1591 /// Get the code completion token for filtering purposes. getCodeCompletionFilter()1592 StringRef getCodeCompletionFilter() { 1593 if (CodeCompletionII) 1594 return CodeCompletionII->getName(); 1595 return {}; 1596 } 1597 1598 /// Retrieve the preprocessing record, or NULL if there is no 1599 /// preprocessing record. getPreprocessingRecord()1600 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 1601 1602 /// Create a new preprocessing record, which will keep track of 1603 /// all macro expansions, macro definitions, etc. 1604 void createPreprocessingRecord(); 1605 1606 /// Returns true if the FileEntry is the PCH through header. 1607 bool isPCHThroughHeader(const FileEntry *FE); 1608 1609 /// True if creating a PCH with a through header. 1610 bool creatingPCHWithThroughHeader(); 1611 1612 /// True if using a PCH with a through header. 1613 bool usingPCHWithThroughHeader(); 1614 1615 /// True if creating a PCH with a #pragma hdrstop. 1616 bool creatingPCHWithPragmaHdrStop(); 1617 1618 /// True if using a PCH with a #pragma hdrstop. 1619 bool usingPCHWithPragmaHdrStop(); 1620 1621 /// Skip tokens until after the #include of the through header or 1622 /// until after a #pragma hdrstop. 1623 void SkipTokensWhileUsingPCH(); 1624 1625 /// Process directives while skipping until the through header or 1626 /// #pragma hdrstop is found. 1627 void HandleSkippedDirectiveWhileUsingPCH(Token &Result, 1628 SourceLocation HashLoc); 1629 1630 /// Enter the specified FileID as the main source file, 1631 /// which implicitly adds the builtin defines etc. 1632 void EnterMainSourceFile(); 1633 1634 /// Inform the preprocessor callbacks that processing is complete. 1635 void EndSourceFile(); 1636 1637 /// Add a source file to the top of the include stack and 1638 /// start lexing tokens from it instead of the current buffer. 1639 /// 1640 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1641 bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, 1642 SourceLocation Loc, bool IsFirstIncludeOfFile = true); 1643 1644 /// Add a Macro to the top of the include stack and start lexing 1645 /// tokens from it instead of the current buffer. 1646 /// 1647 /// \param Args specifies the tokens input to a function-like macro. 1648 /// \param ILEnd specifies the location of the ')' for a function-like macro 1649 /// or the identifier for an object-like macro. 1650 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, 1651 MacroArgs *Args); 1652 1653 private: 1654 /// Add a "macro" context to the top of the include stack, 1655 /// which will cause the lexer to start returning the specified tokens. 1656 /// 1657 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1658 /// will not be subject to further macro expansion. Otherwise, these tokens 1659 /// will be re-macro-expanded when/if expansion is enabled. 1660 /// 1661 /// If \p OwnsTokens is false, this method assumes that the specified stream 1662 /// of tokens has a permanent owner somewhere, so they do not need to be 1663 /// copied. If it is true, it assumes the array of tokens is allocated with 1664 /// \c new[] and the Preprocessor will delete[] it. 1665 /// 1666 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag 1667 /// set, see the flag documentation for details. 1668 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1669 bool DisableMacroExpansion, bool OwnsTokens, 1670 bool IsReinject); 1671 1672 public: EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion,bool IsReinject)1673 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1674 bool DisableMacroExpansion, bool IsReinject) { 1675 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true, 1676 IsReinject); 1677 } 1678 EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion,bool IsReinject)1679 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion, 1680 bool IsReinject) { 1681 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false, 1682 IsReinject); 1683 } 1684 1685 /// Pop the current lexer/macro exp off the top of the lexer stack. 1686 /// 1687 /// This should only be used in situations where the current state of the 1688 /// top-of-stack lexer is known. 1689 void RemoveTopOfLexerStack(); 1690 1691 /// From the point that this method is called, and until 1692 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1693 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1694 /// make the Preprocessor re-lex the same tokens. 1695 /// 1696 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1697 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1698 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1699 /// 1700 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1701 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1702 /// tokens will continue indefinitely. 1703 /// 1704 void EnableBacktrackAtThisPos(); 1705 1706 /// Disable the last EnableBacktrackAtThisPos call. 1707 void CommitBacktrackedTokens(); 1708 1709 /// Make Preprocessor re-lex the tokens that were lexed since 1710 /// EnableBacktrackAtThisPos() was previously called. 1711 void Backtrack(); 1712 1713 /// True if EnableBacktrackAtThisPos() was called and 1714 /// caching of tokens is on. isBacktrackEnabled()1715 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1716 1717 /// Lex the next token for this preprocessor. 1718 void Lex(Token &Result); 1719 1720 /// Lex all tokens for this preprocessor until (and excluding) end of file. 1721 void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr); 1722 1723 /// Lex a token, forming a header-name token if possible. 1724 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); 1725 1726 bool LexAfterModuleImport(Token &Result); 1727 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); 1728 1729 void makeModuleVisible(Module *M, SourceLocation Loc); 1730 getModuleImportLoc(Module * M)1731 SourceLocation getModuleImportLoc(Module *M) const { 1732 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1733 } 1734 1735 /// Lex a string literal, which may be the concatenation of multiple 1736 /// string literals and may even come from macro expansion. 1737 /// \returns true on success, false if a error diagnostic has been generated. LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1738 bool LexStringLiteral(Token &Result, std::string &String, 1739 const char *DiagnosticTag, bool AllowMacroExpansion) { 1740 if (AllowMacroExpansion) 1741 Lex(Result); 1742 else 1743 LexUnexpandedToken(Result); 1744 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1745 AllowMacroExpansion); 1746 } 1747 1748 /// Complete the lexing of a string literal where the first token has 1749 /// already been lexed (see LexStringLiteral). 1750 bool FinishLexStringLiteral(Token &Result, std::string &String, 1751 const char *DiagnosticTag, 1752 bool AllowMacroExpansion); 1753 1754 /// Lex a token. If it's a comment, keep lexing until we get 1755 /// something not a comment. 1756 /// 1757 /// This is useful in -E -C mode where comments would foul up preprocessor 1758 /// directive handling. LexNonComment(Token & Result)1759 void LexNonComment(Token &Result) { 1760 do 1761 Lex(Result); 1762 while (Result.getKind() == tok::comment); 1763 } 1764 1765 /// Just like Lex, but disables macro expansion of identifier tokens. LexUnexpandedToken(Token & Result)1766 void LexUnexpandedToken(Token &Result) { 1767 // Disable macro expansion. 1768 bool OldVal = DisableMacroExpansion; 1769 DisableMacroExpansion = true; 1770 // Lex the token. 1771 Lex(Result); 1772 1773 // Reenable it. 1774 DisableMacroExpansion = OldVal; 1775 } 1776 1777 /// Like LexNonComment, but this disables macro expansion of 1778 /// identifier tokens. LexUnexpandedNonComment(Token & Result)1779 void LexUnexpandedNonComment(Token &Result) { 1780 do 1781 LexUnexpandedToken(Result); 1782 while (Result.getKind() == tok::comment); 1783 } 1784 1785 /// Parses a simple integer literal to get its numeric value. Floating 1786 /// point literals and user defined literals are rejected. Used primarily to 1787 /// handle pragmas that accept integer arguments. 1788 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1789 1790 /// Disables macro expansion everywhere except for preprocessor directives. SetMacroExpansionOnlyInDirectives()1791 void SetMacroExpansionOnlyInDirectives() { 1792 DisableMacroExpansion = true; 1793 MacroExpansionInDirectivesOverride = true; 1794 } 1795 1796 /// Peeks ahead N tokens and returns that token without consuming any 1797 /// tokens. 1798 /// 1799 /// LookAhead(0) returns the next token that would be returned by Lex(), 1800 /// LookAhead(1) returns the token after it, etc. This returns normal 1801 /// tokens after phase 5. As such, it is equivalent to using 1802 /// 'Lex', not 'LexUnexpandedToken'. LookAhead(unsigned N)1803 const Token &LookAhead(unsigned N) { 1804 assert(LexLevel == 0 && "cannot use lookahead while lexing"); 1805 if (CachedLexPos + N < CachedTokens.size()) 1806 return CachedTokens[CachedLexPos+N]; 1807 else 1808 return PeekAhead(N+1); 1809 } 1810 1811 /// When backtracking is enabled and tokens are cached, 1812 /// this allows to revert a specific number of tokens. 1813 /// 1814 /// Note that the number of tokens being reverted should be up to the last 1815 /// backtrack position, not more. RevertCachedTokens(unsigned N)1816 void RevertCachedTokens(unsigned N) { 1817 assert(isBacktrackEnabled() && 1818 "Should only be called when tokens are cached for backtracking"); 1819 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 1820 && "Should revert tokens up to the last backtrack position, not more"); 1821 assert(signed(CachedLexPos) - signed(N) >= 0 && 1822 "Corrupted backtrack positions ?"); 1823 CachedLexPos -= N; 1824 } 1825 1826 /// Enters a token in the token stream to be lexed next. 1827 /// 1828 /// If BackTrack() is called afterwards, the token will remain at the 1829 /// insertion point. 1830 /// If \p IsReinject is true, resulting token will have Token::IsReinjected 1831 /// flag set. See the flag documentation for details. EnterToken(const Token & Tok,bool IsReinject)1832 void EnterToken(const Token &Tok, bool IsReinject) { 1833 if (LexLevel) { 1834 // It's not correct in general to enter caching lex mode while in the 1835 // middle of a nested lexing action. 1836 auto TokCopy = std::make_unique<Token[]>(1); 1837 TokCopy[0] = Tok; 1838 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject); 1839 } else { 1840 EnterCachingLexMode(); 1841 assert(IsReinject && "new tokens in the middle of cached stream"); 1842 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1843 } 1844 } 1845 1846 /// We notify the Preprocessor that if it is caching tokens (because 1847 /// backtrack is enabled) it should replace the most recent cached tokens 1848 /// with the given annotation token. This function has no effect if 1849 /// backtracking is not enabled. 1850 /// 1851 /// Note that the use of this function is just for optimization, so that the 1852 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1853 /// invoked. AnnotateCachedTokens(const Token & Tok)1854 void AnnotateCachedTokens(const Token &Tok) { 1855 assert(Tok.isAnnotation() && "Expected annotation token"); 1856 if (CachedLexPos != 0 && isBacktrackEnabled()) 1857 AnnotatePreviousCachedTokens(Tok); 1858 } 1859 1860 /// Get the location of the last cached token, suitable for setting the end 1861 /// location of an annotation token. getLastCachedTokenLocation()1862 SourceLocation getLastCachedTokenLocation() const { 1863 assert(CachedLexPos != 0); 1864 return CachedTokens[CachedLexPos-1].getLastLoc(); 1865 } 1866 1867 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1868 /// CachedTokens. 1869 bool IsPreviousCachedToken(const Token &Tok) const; 1870 1871 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1872 /// in \p NewToks. 1873 /// 1874 /// Useful when a token needs to be split in smaller ones and CachedTokens 1875 /// most recent token must to be updated to reflect that. 1876 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1877 1878 /// Replace the last token with an annotation token. 1879 /// 1880 /// Like AnnotateCachedTokens(), this routine replaces an 1881 /// already-parsed (and resolved) token with an annotation 1882 /// token. However, this routine only replaces the last token with 1883 /// the annotation token; it does not affect any other cached 1884 /// tokens. This function has no effect if backtracking is not 1885 /// enabled. ReplaceLastTokenWithAnnotation(const Token & Tok)1886 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1887 assert(Tok.isAnnotation() && "Expected annotation token"); 1888 if (CachedLexPos != 0 && isBacktrackEnabled()) 1889 CachedTokens[CachedLexPos-1] = Tok; 1890 } 1891 1892 /// Enter an annotation token into the token stream. 1893 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, 1894 void *AnnotationVal); 1895 1896 /// Determine whether it's possible for a future call to Lex to produce an 1897 /// annotation token created by a previous call to EnterAnnotationToken. mightHavePendingAnnotationTokens()1898 bool mightHavePendingAnnotationTokens() { 1899 return CurLexerCallback != CLK_Lexer; 1900 } 1901 1902 /// Update the current token to represent the provided 1903 /// identifier, in order to cache an action performed by typo correction. TypoCorrectToken(const Token & Tok)1904 void TypoCorrectToken(const Token &Tok) { 1905 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1906 if (CachedLexPos != 0 && isBacktrackEnabled()) 1907 CachedTokens[CachedLexPos-1] = Tok; 1908 } 1909 1910 /// Recompute the current lexer kind based on the CurLexer/ 1911 /// CurTokenLexer pointers. 1912 void recomputeCurLexerKind(); 1913 1914 /// Returns true if incremental processing is enabled isIncrementalProcessingEnabled()1915 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1916 1917 /// Enables the incremental processing 1918 void enableIncrementalProcessing(bool value = true) { 1919 IncrementalProcessing = value; 1920 } 1921 1922 /// Specify the point at which code-completion will be performed. 1923 /// 1924 /// \param File the file in which code completion should occur. If 1925 /// this file is included multiple times, code-completion will 1926 /// perform completion the first time it is included. If NULL, this 1927 /// function clears out the code-completion point. 1928 /// 1929 /// \param Line the line at which code completion should occur 1930 /// (1-based). 1931 /// 1932 /// \param Column the column at which code completion should occur 1933 /// (1-based). 1934 /// 1935 /// \returns true if an error occurred, false otherwise. 1936 bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, 1937 unsigned Column); 1938 1939 /// Determine if we are performing code completion. isCodeCompletionEnabled()1940 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1941 1942 /// Returns the location of the code-completion point. 1943 /// 1944 /// Returns an invalid location if code-completion is not enabled or the file 1945 /// containing the code-completion point has not been lexed yet. getCodeCompletionLoc()1946 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1947 1948 /// Returns the start location of the file of code-completion point. 1949 /// 1950 /// Returns an invalid location if code-completion is not enabled or the file 1951 /// containing the code-completion point has not been lexed yet. getCodeCompletionFileLoc()1952 SourceLocation getCodeCompletionFileLoc() const { 1953 return CodeCompletionFileLoc; 1954 } 1955 1956 /// Returns true if code-completion is enabled and we have hit the 1957 /// code-completion point. isCodeCompletionReached()1958 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1959 1960 /// Note that we hit the code-completion point. setCodeCompletionReached()1961 void setCodeCompletionReached() { 1962 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1963 CodeCompletionReached = true; 1964 // Silence any diagnostics that occur after we hit the code-completion. 1965 getDiagnostics().setSuppressAllDiagnostics(true); 1966 } 1967 1968 /// The location of the currently-active \#pragma clang 1969 /// arc_cf_code_audited begin. 1970 /// 1971 /// Returns an invalid location if there is no such pragma active. 1972 std::pair<IdentifierInfo *, SourceLocation> getPragmaARCCFCodeAuditedInfo()1973 getPragmaARCCFCodeAuditedInfo() const { 1974 return PragmaARCCFCodeAuditedInfo; 1975 } 1976 1977 /// Set the location of the currently-active \#pragma clang 1978 /// arc_cf_code_audited begin. An invalid location ends the pragma. setPragmaARCCFCodeAuditedInfo(IdentifierInfo * Ident,SourceLocation Loc)1979 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, 1980 SourceLocation Loc) { 1981 PragmaARCCFCodeAuditedInfo = {Ident, Loc}; 1982 } 1983 1984 /// The location of the currently-active \#pragma clang 1985 /// assume_nonnull begin. 1986 /// 1987 /// Returns an invalid location if there is no such pragma active. getPragmaAssumeNonNullLoc()1988 SourceLocation getPragmaAssumeNonNullLoc() const { 1989 return PragmaAssumeNonNullLoc; 1990 } 1991 1992 /// Set the location of the currently-active \#pragma clang 1993 /// assume_nonnull begin. An invalid location ends the pragma. setPragmaAssumeNonNullLoc(SourceLocation Loc)1994 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 1995 PragmaAssumeNonNullLoc = Loc; 1996 } 1997 1998 /// Get the location of the recorded unterminated \#pragma clang 1999 /// assume_nonnull begin in the preamble, if one exists. 2000 /// 2001 /// Returns an invalid location if the premable did not end with 2002 /// such a pragma active or if there is no recorded preamble. getPreambleRecordedPragmaAssumeNonNullLoc()2003 SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const { 2004 return PreambleRecordedPragmaAssumeNonNullLoc; 2005 } 2006 2007 /// Record the location of the unterminated \#pragma clang 2008 /// assume_nonnull begin in the preamble. setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc)2009 void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) { 2010 PreambleRecordedPragmaAssumeNonNullLoc = Loc; 2011 } 2012 2013 /// Set the directory in which the main file should be considered 2014 /// to have been found, if it is not a real file. setMainFileDir(DirectoryEntryRef Dir)2015 void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; } 2016 2017 /// Instruct the preprocessor to skip part of the main source file. 2018 /// 2019 /// \param Bytes The number of bytes in the preamble to skip. 2020 /// 2021 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 2022 /// start of a line. setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)2023 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 2024 SkipMainFilePreamble.first = Bytes; 2025 SkipMainFilePreamble.second = StartOfLine; 2026 } 2027 2028 /// Forwarding function for diagnostics. This emits a diagnostic at 2029 /// the specified Token's location, translating the token's start 2030 /// position in the current buffer into a SourcePosition object for rendering. Diag(SourceLocation Loc,unsigned DiagID)2031 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 2032 return Diags->Report(Loc, DiagID); 2033 } 2034 Diag(const Token & Tok,unsigned DiagID)2035 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 2036 return Diags->Report(Tok.getLocation(), DiagID); 2037 } 2038 2039 /// Return the 'spelling' of the token at the given 2040 /// location; does not go up to the spelling location or down to the 2041 /// expansion location. 2042 /// 2043 /// \param buffer A buffer which will be used only if the token requires 2044 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 2045 /// \param invalid If non-null, will be set \c true if an error occurs. 2046 StringRef getSpelling(SourceLocation loc, 2047 SmallVectorImpl<char> &buffer, 2048 bool *invalid = nullptr) const { 2049 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 2050 } 2051 2052 /// Return the 'spelling' of the Tok token. 2053 /// 2054 /// The spelling of a token is the characters used to represent the token in 2055 /// the source file after trigraph expansion and escaped-newline folding. In 2056 /// particular, this wants to get the true, uncanonicalized, spelling of 2057 /// things like digraphs, UCNs, etc. 2058 /// 2059 /// \param Invalid If non-null, will be set \c true if an error occurs. 2060 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 2061 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 2062 } 2063 2064 /// Get the spelling of a token into a preallocated buffer, instead 2065 /// of as an std::string. 2066 /// 2067 /// The caller is required to allocate enough space for the token, which is 2068 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 2069 /// actual result is returned. 2070 /// 2071 /// Note that this method may do two possible things: it may either fill in 2072 /// the buffer specified with characters, or it may *change the input pointer* 2073 /// to point to a constant buffer with the data already in it (avoiding a 2074 /// copy). The caller is not allowed to modify the returned buffer pointer 2075 /// if an internal buffer is returned. 2076 unsigned getSpelling(const Token &Tok, const char *&Buffer, 2077 bool *Invalid = nullptr) const { 2078 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 2079 } 2080 2081 /// Get the spelling of a token into a SmallVector. 2082 /// 2083 /// Note that the returned StringRef may not point to the 2084 /// supplied buffer if a copy can be avoided. 2085 StringRef getSpelling(const Token &Tok, 2086 SmallVectorImpl<char> &Buffer, 2087 bool *Invalid = nullptr) const; 2088 2089 /// Relex the token at the specified location. 2090 /// \returns true if there was a failure, false on success. 2091 bool getRawToken(SourceLocation Loc, Token &Result, 2092 bool IgnoreWhiteSpace = false) { 2093 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 2094 } 2095 2096 /// Given a Token \p Tok that is a numeric constant with length 1, 2097 /// return the character. 2098 char 2099 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 2100 bool *Invalid = nullptr) const { 2101 assert(Tok.is(tok::numeric_constant) && 2102 Tok.getLength() == 1 && "Called on unsupported token"); 2103 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 2104 2105 // If the token is carrying a literal data pointer, just use it. 2106 if (const char *D = Tok.getLiteralData()) 2107 return *D; 2108 2109 // Otherwise, fall back on getCharacterData, which is slower, but always 2110 // works. 2111 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid); 2112 } 2113 2114 /// Retrieve the name of the immediate macro expansion. 2115 /// 2116 /// This routine starts from a source location, and finds the name of the 2117 /// macro responsible for its immediate expansion. It looks through any 2118 /// intervening macro argument expansions to compute this. It returns a 2119 /// StringRef that refers to the SourceManager-owned buffer of the source 2120 /// where that macro name is spelled. Thus, the result shouldn't out-live 2121 /// the SourceManager. getImmediateMacroName(SourceLocation Loc)2122 StringRef getImmediateMacroName(SourceLocation Loc) { 2123 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 2124 } 2125 2126 /// Plop the specified string into a scratch buffer and set the 2127 /// specified token's location and length to it. 2128 /// 2129 /// If specified, the source location provides a location of the expansion 2130 /// point of the token. 2131 void CreateString(StringRef Str, Token &Tok, 2132 SourceLocation ExpansionLocStart = SourceLocation(), 2133 SourceLocation ExpansionLocEnd = SourceLocation()); 2134 2135 /// Split the first Length characters out of the token starting at TokLoc 2136 /// and return a location pointing to the split token. Re-lexing from the 2137 /// split token will return the split token rather than the original. 2138 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length); 2139 2140 /// Computes the source location just past the end of the 2141 /// token at this source location. 2142 /// 2143 /// This routine can be used to produce a source location that 2144 /// points just past the end of the token referenced by \p Loc, and 2145 /// is generally used when a diagnostic needs to point just after a 2146 /// token where it expected something different that it received. If 2147 /// the returned source location would not be meaningful (e.g., if 2148 /// it points into a macro), this routine returns an invalid 2149 /// source location. 2150 /// 2151 /// \param Offset an offset from the end of the token, where the source 2152 /// location should refer to. The default offset (0) produces a source 2153 /// location pointing just past the end of the token; an offset of 1 produces 2154 /// a source location pointing to the last character in the token, etc. 2155 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 2156 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 2157 } 2158 2159 /// Returns true if the given MacroID location points at the first 2160 /// token of the macro expansion. 2161 /// 2162 /// \param MacroBegin If non-null and function returns true, it is set to 2163 /// begin location of the macro. 2164 bool isAtStartOfMacroExpansion(SourceLocation loc, 2165 SourceLocation *MacroBegin = nullptr) const { 2166 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 2167 MacroBegin); 2168 } 2169 2170 /// Returns true if the given MacroID location points at the last 2171 /// token of the macro expansion. 2172 /// 2173 /// \param MacroEnd If non-null and function returns true, it is set to 2174 /// end location of the macro. 2175 bool isAtEndOfMacroExpansion(SourceLocation loc, 2176 SourceLocation *MacroEnd = nullptr) const { 2177 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 2178 } 2179 2180 /// Print the token to stderr, used for debugging. 2181 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 2182 void DumpLocation(SourceLocation Loc) const; 2183 void DumpMacro(const MacroInfo &MI) const; 2184 void dumpMacroInfo(const IdentifierInfo *II); 2185 2186 /// Given a location that specifies the start of a 2187 /// token, return a new location that specifies a character within the token. AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)2188 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 2189 unsigned Char) const { 2190 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 2191 } 2192 2193 /// Increment the counters for the number of token paste operations 2194 /// performed. 2195 /// 2196 /// If fast was specified, this is a 'fast paste' case we handled. IncrementPasteCounter(bool isFast)2197 void IncrementPasteCounter(bool isFast) { 2198 if (isFast) 2199 ++NumFastTokenPaste; 2200 else 2201 ++NumTokenPaste; 2202 } 2203 2204 void PrintStats(); 2205 2206 size_t getTotalMemory() const; 2207 2208 /// When the macro expander pastes together a comment (/##/) in Microsoft 2209 /// mode, this method handles updating the current state, returning the 2210 /// token on the next source line. 2211 void HandleMicrosoftCommentPaste(Token &Tok); 2212 2213 //===--------------------------------------------------------------------===// 2214 // Preprocessor callback methods. These are invoked by a lexer as various 2215 // directives and events are found. 2216 2217 /// Given a tok::raw_identifier token, look up the 2218 /// identifier information for the token and install it into the token, 2219 /// updating the token kind accordingly. 2220 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 2221 2222 private: 2223 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 2224 2225 public: 2226 /// Specifies the reason for poisoning an identifier. 2227 /// 2228 /// If that identifier is accessed while poisoned, then this reason will be 2229 /// used instead of the default "poisoned" diagnostic. 2230 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 2231 2232 /// Display reason for poisoned identifier. 2233 void HandlePoisonedIdentifier(Token & Identifier); 2234 MaybeHandlePoisonedIdentifier(Token & Identifier)2235 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 2236 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 2237 if(II->isPoisoned()) { 2238 HandlePoisonedIdentifier(Identifier); 2239 } 2240 } 2241 } 2242 2243 private: 2244 /// Identifiers used for SEH handling in Borland. These are only 2245 /// allowed in particular circumstances 2246 // __except block 2247 IdentifierInfo *Ident__exception_code, 2248 *Ident___exception_code, 2249 *Ident_GetExceptionCode; 2250 // __except filter expression 2251 IdentifierInfo *Ident__exception_info, 2252 *Ident___exception_info, 2253 *Ident_GetExceptionInfo; 2254 // __finally 2255 IdentifierInfo *Ident__abnormal_termination, 2256 *Ident___abnormal_termination, 2257 *Ident_AbnormalTermination; 2258 2259 const char *getCurLexerEndPos(); 2260 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); 2261 2262 public: 2263 void PoisonSEHIdentifiers(bool Poison = true); // Borland 2264 2265 /// Callback invoked when the lexer reads an identifier and has 2266 /// filled in the tokens IdentifierInfo member. 2267 /// 2268 /// This callback potentially macro expands it or turns it into a named 2269 /// token (like 'for'). 2270 /// 2271 /// \returns true if we actually computed a token, false if we need to 2272 /// lex again. 2273 bool HandleIdentifier(Token &Identifier); 2274 2275 /// Callback invoked when the lexer hits the end of the current file. 2276 /// 2277 /// This either returns the EOF token and returns true, or 2278 /// pops a level off the include stack and returns false, at which point the 2279 /// client should call lex again. 2280 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 2281 2282 /// Callback invoked when the current TokenLexer hits the end of its 2283 /// token stream. 2284 bool HandleEndOfTokenLexer(Token &Result); 2285 2286 /// Callback invoked when the lexer sees a # token at the start of a 2287 /// line. 2288 /// 2289 /// This consumes the directive, modifies the lexer/preprocessor state, and 2290 /// advances the lexer(s) so that the next token read is the correct one. 2291 void HandleDirective(Token &Result); 2292 2293 /// Ensure that the next token is a tok::eod token. 2294 /// 2295 /// If not, emit a diagnostic and consume up until the eod. 2296 /// If \p EnableMacros is true, then we consider macros that expand to zero 2297 /// tokens as being ok. 2298 /// 2299 /// \return The location of the end of the directive (the terminating 2300 /// newline). 2301 SourceLocation CheckEndOfDirective(const char *DirType, 2302 bool EnableMacros = false); 2303 2304 /// Read and discard all tokens remaining on the current line until 2305 /// the tok::eod token is found. Returns the range of the skipped tokens. 2306 SourceRange DiscardUntilEndOfDirective(); 2307 2308 /// Returns true if the preprocessor has seen a use of 2309 /// __DATE__ or __TIME__ in the file so far. SawDateOrTime()2310 bool SawDateOrTime() const { 2311 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 2312 } getCounterValue()2313 unsigned getCounterValue() const { return CounterValue; } setCounterValue(unsigned V)2314 void setCounterValue(unsigned V) { CounterValue = V; } 2315 getCurrentFPEvalMethod()2316 LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const { 2317 assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine && 2318 "FPEvalMethod should be set either from command line or from the " 2319 "target info"); 2320 return CurrentFPEvalMethod; 2321 } 2322 getTUFPEvalMethod()2323 LangOptions::FPEvalMethodKind getTUFPEvalMethod() const { 2324 return TUFPEvalMethod; 2325 } 2326 getLastFPEvalPragmaLocation()2327 SourceLocation getLastFPEvalPragmaLocation() const { 2328 return LastFPEvalPragmaLocation; 2329 } 2330 setCurrentFPEvalMethod(SourceLocation PragmaLoc,LangOptions::FPEvalMethodKind Val)2331 void setCurrentFPEvalMethod(SourceLocation PragmaLoc, 2332 LangOptions::FPEvalMethodKind Val) { 2333 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2334 "FPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2335 // This is the location of the '#pragma float_control" where the 2336 // execution state is modifed. 2337 LastFPEvalPragmaLocation = PragmaLoc; 2338 CurrentFPEvalMethod = Val; 2339 TUFPEvalMethod = Val; 2340 } 2341 setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)2342 void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) { 2343 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2344 "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2345 TUFPEvalMethod = Val; 2346 } 2347 2348 /// Retrieves the module that we're currently building, if any. 2349 Module *getCurrentModule(); 2350 2351 /// Retrieves the module whose implementation we're current compiling, if any. 2352 Module *getCurrentModuleImplementation(); 2353 2354 /// If we are preprocessing a named module. isInNamedModule()2355 bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); } 2356 2357 /// If we are proprocessing a named interface unit. 2358 /// Note that a module implementation partition is not considered as an 2359 /// named interface unit here although it is importable 2360 /// to ease the parsing. isInNamedInterfaceUnit()2361 bool isInNamedInterfaceUnit() const { 2362 return ModuleDeclState.isNamedInterface(); 2363 } 2364 2365 /// Get the named module name we're preprocessing. 2366 /// Requires we're preprocessing a named module. getNamedModuleName()2367 StringRef getNamedModuleName() const { return ModuleDeclState.getName(); } 2368 2369 /// If we are implementing an implementation module unit. 2370 /// Note that the module implementation partition is not considered as an 2371 /// implementation unit. isInImplementationUnit()2372 bool isInImplementationUnit() const { 2373 return ModuleDeclState.isImplementationUnit(); 2374 } 2375 2376 /// If we're importing a standard C++20 Named Modules. isInImportingCXXNamedModules()2377 bool isInImportingCXXNamedModules() const { 2378 // NamedModuleImportPath will be non-empty only if we're importing 2379 // Standard C++ named modules. 2380 return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules && 2381 !IsAtImport; 2382 } 2383 2384 /// Allocate a new MacroInfo object with the provided SourceLocation. 2385 MacroInfo *AllocateMacroInfo(SourceLocation L); 2386 2387 /// Turn the specified lexer token into a fully checked and spelled 2388 /// filename, e.g. as an operand of \#include. 2389 /// 2390 /// The caller is expected to provide a buffer that is large enough to hold 2391 /// the spelling of the filename, but is also expected to handle the case 2392 /// when this method decides to use a different buffer. 2393 /// 2394 /// \returns true if the input filename was in <>'s or false if it was 2395 /// in ""'s. 2396 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer); 2397 2398 /// Given a "foo" or \<foo> reference, look up the indicated file. 2399 /// 2400 /// Returns std::nullopt on failure. \p isAngled indicates whether the file 2401 /// reference is for system \#include's or not (i.e. using <> instead of ""). 2402 OptionalFileEntryRef 2403 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, 2404 ConstSearchDirIterator FromDir, const FileEntry *FromFile, 2405 ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath, 2406 SmallVectorImpl<char> *RelativePath, 2407 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, 2408 bool *IsFrameworkFound, bool SkipCache = false, 2409 bool OpenFile = true, bool CacheFailures = true); 2410 2411 /// Return true if we're in the top-level file, not in a \#include. 2412 bool isInPrimaryFile() const; 2413 2414 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is 2415 /// followed by EOD. Return true if the token is not a valid on-off-switch. 2416 bool LexOnOffSwitch(tok::OnOffSwitch &Result); 2417 2418 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 2419 bool *ShadowFlag = nullptr); 2420 2421 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma); 2422 Module *LeaveSubmodule(bool ForPragma); 2423 2424 private: 2425 friend void TokenLexer::ExpandFunctionArguments(); 2426 PushIncludeMacroStack()2427 void PushIncludeMacroStack() { 2428 assert(CurLexerCallback != CLK_CachingLexer && 2429 "cannot push a caching lexer"); 2430 IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule, 2431 std::move(CurLexer), CurPPLexer, 2432 std::move(CurTokenLexer), CurDirLookup); 2433 CurPPLexer = nullptr; 2434 } 2435 PopIncludeMacroStack()2436 void PopIncludeMacroStack() { 2437 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 2438 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 2439 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 2440 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 2441 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; 2442 CurLexerCallback = IncludeMacroStack.back().CurLexerCallback; 2443 IncludeMacroStack.pop_back(); 2444 } 2445 2446 void PropagateLineStartLeadingSpaceInfo(Token &Result); 2447 2448 /// Determine whether we need to create module macros for #defines in the 2449 /// current context. 2450 bool needModuleMacros() const; 2451 2452 /// Update the set of active module macros and ambiguity flag for a module 2453 /// macro name. 2454 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 2455 2456 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 2457 SourceLocation Loc); 2458 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 2459 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 2460 bool isPublic); 2461 2462 /// Lex and validate a macro name, which occurs after a 2463 /// \#define or \#undef. 2464 /// 2465 /// \param MacroNameTok Token that represents the name defined or undefined. 2466 /// \param IsDefineUndef Kind if preprocessor directive. 2467 /// \param ShadowFlag Points to flag that is set if macro name shadows 2468 /// a keyword. 2469 /// 2470 /// This emits a diagnostic, sets the token kind to eod, 2471 /// and discards the rest of the macro line if the macro name is invalid. 2472 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 2473 bool *ShadowFlag = nullptr); 2474 2475 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 2476 /// entire line) of the macro's tokens and adds them to MacroInfo, and while 2477 /// doing so performs certain validity checks including (but not limited to): 2478 /// - # (stringization) is followed by a macro parameter 2479 /// \param MacroNameTok - Token that represents the macro name 2480 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard 2481 /// 2482 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and 2483 /// returns a nullptr if an invalid sequence of tokens is encountered. 2484 MacroInfo *ReadOptionalMacroParameterListAndBody( 2485 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard); 2486 2487 /// The ( starting an argument list of a macro definition has just been read. 2488 /// Lex the rest of the parameters and the closing ), updating \p MI with 2489 /// what we learn and saving in \p LastTok the last token read. 2490 /// Return true if an error occurs parsing the arg list. 2491 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); 2492 2493 /// Provide a suggestion for a typoed directive. If there is no typo, then 2494 /// just skip suggesting. 2495 /// 2496 /// \param Tok - Token that represents the directive 2497 /// \param Directive - String reference for the directive name 2498 void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const; 2499 2500 /// We just read a \#if or related directive and decided that the 2501 /// subsequent tokens are in the \#if'd out portion of the 2502 /// file. Lex the rest of the file, until we see an \#endif. If \p 2503 /// FoundNonSkipPortion is true, then we have already emitted code for part of 2504 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 2505 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 2506 /// already seen one so a \#else directive is a duplicate. When this returns, 2507 /// the caller can lex the first valid token. 2508 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, 2509 SourceLocation IfTokenLoc, 2510 bool FoundNonSkipPortion, bool FoundElse, 2511 SourceLocation ElseLoc = SourceLocation()); 2512 2513 /// Information about the result for evaluating an expression for a 2514 /// preprocessor directive. 2515 struct DirectiveEvalResult { 2516 /// Whether the expression was evaluated as true or not. 2517 bool Conditional; 2518 2519 /// True if the expression contained identifiers that were undefined. 2520 bool IncludedUndefinedIds; 2521 2522 /// The source range for the expression. 2523 SourceRange ExprRange; 2524 }; 2525 2526 /// Evaluate an integer constant expression that may occur after a 2527 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2528 /// 2529 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2530 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); 2531 2532 /// Process a '__has_include("path")' expression. 2533 /// 2534 /// Returns true if successful. 2535 bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II); 2536 2537 /// Process '__has_include_next("path")' expression. 2538 /// 2539 /// Returns true if successful. 2540 bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II); 2541 2542 /// Get the directory and file from which to start \#include_next lookup. 2543 std::pair<ConstSearchDirIterator, const FileEntry *> 2544 getIncludeNextStart(const Token &IncludeNextTok) const; 2545 2546 /// Install the standard preprocessor pragmas: 2547 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 2548 void RegisterBuiltinPragmas(); 2549 2550 /// Register builtin macros such as __LINE__ with the identifier table. 2551 void RegisterBuiltinMacros(); 2552 2553 /// If an identifier token is read that is to be expanded as a macro, handle 2554 /// it and return the next token as 'Tok'. If we lexed a token, return true; 2555 /// otherwise the caller should lex again. 2556 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD); 2557 2558 /// Cache macro expanded tokens for TokenLexers. 2559 // 2560 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 2561 /// going to lex in the cache and when it finishes the tokens are removed 2562 /// from the end of the cache. 2563 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 2564 ArrayRef<Token> tokens); 2565 2566 void removeCachedMacroExpandedTokensOfLastLexer(); 2567 2568 /// Determine whether the next preprocessor token to be 2569 /// lexed is a '('. If so, consume the token and return true, if not, this 2570 /// method should have no observable side-effect on the lexed tokens. 2571 bool isNextPPTokenLParen(); 2572 2573 /// After reading "MACRO(", this method is invoked to read all of the formal 2574 /// arguments specified for the macro invocation. Returns null on error. 2575 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, 2576 SourceLocation &MacroEnd); 2577 2578 /// If an identifier token is read that is to be expanded 2579 /// as a builtin macro, handle it and return the next token as 'Tok'. 2580 void ExpandBuiltinMacro(Token &Tok); 2581 2582 /// Read a \c _Pragma directive, slice it up, process it, then 2583 /// return the first token after the directive. 2584 /// This assumes that the \c _Pragma token has just been read into \p Tok. 2585 void Handle_Pragma(Token &Tok); 2586 2587 /// Like Handle_Pragma except the pragma text is not enclosed within 2588 /// a string literal. 2589 void HandleMicrosoft__pragma(Token &Tok); 2590 2591 /// Add a lexer to the top of the include stack and 2592 /// start lexing tokens from it instead of the current buffer. 2593 void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir); 2594 2595 /// Set the FileID for the preprocessor predefines. setPredefinesFileID(FileID FID)2596 void setPredefinesFileID(FileID FID) { 2597 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 2598 PredefinesFileID = FID; 2599 } 2600 2601 /// Set the FileID for the PCH through header. 2602 void setPCHThroughHeaderFileID(FileID FID); 2603 2604 /// Returns true if we are lexing from a file and not a 2605 /// pragma or a macro. IsFileLexer(const Lexer * L,const PreprocessorLexer * P)2606 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 2607 return L ? !L->isPragmaLexer() : P != nullptr; 2608 } 2609 IsFileLexer(const IncludeStackInfo & I)2610 static bool IsFileLexer(const IncludeStackInfo& I) { 2611 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 2612 } 2613 IsFileLexer()2614 bool IsFileLexer() const { 2615 return IsFileLexer(CurLexer.get(), CurPPLexer); 2616 } 2617 2618 //===--------------------------------------------------------------------===// 2619 // Caching stuff. 2620 void CachingLex(Token &Result); 2621 InCachingLexMode()2622 bool InCachingLexMode() const { 2623 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 2624 // that we are past EOF, not that we are in CachingLex mode. 2625 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty(); 2626 } 2627 2628 void EnterCachingLexMode(); 2629 void EnterCachingLexModeUnchecked(); 2630 ExitCachingLexMode()2631 void ExitCachingLexMode() { 2632 if (InCachingLexMode()) 2633 RemoveTopOfLexerStack(); 2634 } 2635 2636 const Token &PeekAhead(unsigned N); 2637 void AnnotatePreviousCachedTokens(const Token &Tok); 2638 2639 //===--------------------------------------------------------------------===// 2640 /// Handle*Directive - implement the various preprocessor directives. These 2641 /// should side-effect the current preprocessor object so that the next call 2642 /// to Lex() will return the appropriate token next. 2643 void HandleLineDirective(); 2644 void HandleDigitDirective(Token &Tok); 2645 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 2646 void HandleIdentSCCSDirective(Token &Tok); 2647 void HandleMacroPublicDirective(Token &Tok); 2648 void HandleMacroPrivateDirective(); 2649 2650 /// An additional notification that can be produced by a header inclusion or 2651 /// import to tell the parser what happened. 2652 struct ImportAction { 2653 enum ActionKind { 2654 None, 2655 ModuleBegin, 2656 ModuleImport, 2657 HeaderUnitImport, 2658 SkippedModuleImport, 2659 Failure, 2660 } Kind; 2661 Module *ModuleForHeader = nullptr; 2662 2663 ImportAction(ActionKind AK, Module *Mod = nullptr) KindImportAction2664 : Kind(AK), ModuleForHeader(Mod) { 2665 assert((AK == None || Mod || AK == Failure) && 2666 "no module for module action"); 2667 } 2668 }; 2669 2670 OptionalFileEntryRef LookupHeaderIncludeOrImport( 2671 ConstSearchDirIterator *CurDir, StringRef &Filename, 2672 SourceLocation FilenameLoc, CharSourceRange FilenameRange, 2673 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, 2674 bool &IsMapped, ConstSearchDirIterator LookupFrom, 2675 const FileEntry *LookupFromFile, StringRef &LookupFilename, 2676 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, 2677 ModuleMap::KnownHeader &SuggestedModule, bool isAngled); 2678 2679 // File inclusion. 2680 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, 2681 ConstSearchDirIterator LookupFrom = nullptr, 2682 const FileEntry *LookupFromFile = nullptr); 2683 ImportAction 2684 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok, 2685 Token &FilenameTok, SourceLocation EndLoc, 2686 ConstSearchDirIterator LookupFrom = nullptr, 2687 const FileEntry *LookupFromFile = nullptr); 2688 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 2689 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 2690 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 2691 void HandleMicrosoftImportDirective(Token &Tok); 2692 2693 public: 2694 /// Check that the given module is available, producing a diagnostic if not. 2695 /// \return \c true if the check failed (because the module is not available). 2696 /// \c false if the module appears to be usable. 2697 static bool checkModuleIsAvailable(const LangOptions &LangOpts, 2698 const TargetInfo &TargetInfo, 2699 const Module &M, DiagnosticsEngine &Diags); 2700 2701 // Module inclusion testing. 2702 /// Find the module that owns the source or header file that 2703 /// \p Loc points to. If the location is in a file that was included 2704 /// into a module, or is outside any module, returns nullptr. 2705 Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual); 2706 2707 /// We want to produce a diagnostic at location IncLoc concerning an 2708 /// unreachable effect at location MLoc (eg, where a desired entity was 2709 /// declared or defined). Determine whether the right way to make MLoc 2710 /// reachable is by #include, and if so, what header should be included. 2711 /// 2712 /// This is not necessarily fast, and might load unexpected module maps, so 2713 /// should only be called by code that intends to produce an error. 2714 /// 2715 /// \param IncLoc The location at which the missing effect was detected. 2716 /// \param MLoc A location within an unimported module at which the desired 2717 /// effect occurred. 2718 /// \return A file that can be #included to provide the desired effect. Null 2719 /// if no such file could be determined or if a #include is not 2720 /// appropriate (eg, if a module should be imported instead). 2721 OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 2722 SourceLocation MLoc); 2723 isRecordingPreamble()2724 bool isRecordingPreamble() const { 2725 return PreambleConditionalStack.isRecording(); 2726 } 2727 hasRecordedPreamble()2728 bool hasRecordedPreamble() const { 2729 return PreambleConditionalStack.hasRecordedPreamble(); 2730 } 2731 getPreambleConditionalStack()2732 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const { 2733 return PreambleConditionalStack.getStack(); 2734 } 2735 setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s)2736 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2737 PreambleConditionalStack.setStack(s); 2738 } 2739 setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,std::optional<PreambleSkipInfo> SkipInfo)2740 void setReplayablePreambleConditionalStack( 2741 ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) { 2742 PreambleConditionalStack.startReplaying(); 2743 PreambleConditionalStack.setStack(s); 2744 PreambleConditionalStack.SkipInfo = SkipInfo; 2745 } 2746 getPreambleSkipInfo()2747 std::optional<PreambleSkipInfo> getPreambleSkipInfo() const { 2748 return PreambleConditionalStack.SkipInfo; 2749 } 2750 2751 private: 2752 /// After processing predefined file, initialize the conditional stack from 2753 /// the preamble. 2754 void replayPreambleConditionalStack(); 2755 2756 // Macro handling. 2757 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard); 2758 void HandleUndefDirective(); 2759 2760 // Conditional Inclusion. 2761 void HandleIfdefDirective(Token &Result, const Token &HashToken, 2762 bool isIfndef, bool ReadAnyTokensBeforeDirective); 2763 void HandleIfDirective(Token &IfToken, const Token &HashToken, 2764 bool ReadAnyTokensBeforeDirective); 2765 void HandleEndifDirective(Token &EndifToken); 2766 void HandleElseDirective(Token &Result, const Token &HashToken); 2767 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken, 2768 tok::PPKeywordKind Kind); 2769 2770 // Pragmas. 2771 void HandlePragmaDirective(PragmaIntroducer Introducer); 2772 2773 public: 2774 void HandlePragmaOnce(Token &OnceTok); 2775 void HandlePragmaMark(Token &MarkTok); 2776 void HandlePragmaPoison(); 2777 void HandlePragmaSystemHeader(Token &SysHeaderTok); 2778 void HandlePragmaDependency(Token &DependencyTok); 2779 void HandlePragmaPushMacro(Token &Tok); 2780 void HandlePragmaPopMacro(Token &Tok); 2781 void HandlePragmaIncludeAlias(Token &Tok); 2782 void HandlePragmaModuleBuild(Token &Tok); 2783 void HandlePragmaHdrstop(Token &Tok); 2784 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 2785 2786 // Return true and store the first token only if any CommentHandler 2787 // has inserted some tokens and getCommentRetentionState() is false. 2788 bool HandleComment(Token &result, SourceRange Comment); 2789 2790 /// A macro is used, update information about macros that need unused 2791 /// warnings. 2792 void markMacroAsUsed(MacroInfo *MI); 2793 addMacroDeprecationMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2794 void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg, 2795 SourceLocation AnnotationLoc) { 2796 auto Annotations = AnnotationInfos.find(II); 2797 if (Annotations == AnnotationInfos.end()) 2798 AnnotationInfos.insert(std::make_pair( 2799 II, 2800 MacroAnnotations::makeDeprecation(AnnotationLoc, std::move(Msg)))); 2801 else 2802 Annotations->second.DeprecationInfo = 2803 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2804 } 2805 addRestrictExpansionMsg(const IdentifierInfo * II,std::string Msg,SourceLocation AnnotationLoc)2806 void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg, 2807 SourceLocation AnnotationLoc) { 2808 auto Annotations = AnnotationInfos.find(II); 2809 if (Annotations == AnnotationInfos.end()) 2810 AnnotationInfos.insert( 2811 std::make_pair(II, MacroAnnotations::makeRestrictExpansion( 2812 AnnotationLoc, std::move(Msg)))); 2813 else 2814 Annotations->second.RestrictExpansionInfo = 2815 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2816 } 2817 addFinalLoc(const IdentifierInfo * II,SourceLocation AnnotationLoc)2818 void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) { 2819 auto Annotations = AnnotationInfos.find(II); 2820 if (Annotations == AnnotationInfos.end()) 2821 AnnotationInfos.insert( 2822 std::make_pair(II, MacroAnnotations::makeFinal(AnnotationLoc))); 2823 else 2824 Annotations->second.FinalAnnotationLoc = AnnotationLoc; 2825 } 2826 getMacroAnnotations(const IdentifierInfo * II)2827 const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const { 2828 return AnnotationInfos.find(II)->second; 2829 } 2830 2831 void emitMacroExpansionWarnings(const Token &Identifier, 2832 bool IsIfnDef = false) const { 2833 IdentifierInfo *Info = Identifier.getIdentifierInfo(); 2834 if (Info->isDeprecatedMacro()) 2835 emitMacroDeprecationWarning(Identifier); 2836 2837 if (Info->isRestrictExpansion() && 2838 !SourceMgr.isInMainFile(Identifier.getLocation())) 2839 emitRestrictExpansionWarning(Identifier); 2840 2841 if (!IsIfnDef) { 2842 if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs) 2843 emitRestrictInfNaNWarning(Identifier, 0); 2844 if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs) 2845 emitRestrictInfNaNWarning(Identifier, 1); 2846 } 2847 } 2848 2849 static void processPathForFileMacro(SmallVectorImpl<char> &Path, 2850 const LangOptions &LangOpts, 2851 const TargetInfo &TI); 2852 2853 static void processPathToFileName(SmallVectorImpl<char> &FileName, 2854 const PresumedLoc &PLoc, 2855 const LangOptions &LangOpts, 2856 const TargetInfo &TI); 2857 2858 private: 2859 void emitMacroDeprecationWarning(const Token &Identifier) const; 2860 void emitRestrictExpansionWarning(const Token &Identifier) const; 2861 void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const; 2862 void emitRestrictInfNaNWarning(const Token &Identifier, 2863 unsigned DiagSelection) const; 2864 2865 /// This boolean state keeps track if the current scanned token (by this PP) 2866 /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a 2867 /// translation unit in a linear order. 2868 bool InSafeBufferOptOutRegion = false; 2869 2870 /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out 2871 /// region if PP is currently in such a region. Hold undefined value 2872 /// otherwise. 2873 SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. 2874 2875 // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one 2876 // translation unit. Each region is represented by a pair of start and end 2877 // locations. A region is "open" if its' start and end locations are 2878 // identical. 2879 SmallVector<std::pair<SourceLocation, SourceLocation>, 8> SafeBufferOptOutMap; 2880 2881 public: 2882 /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out 2883 /// region. This `Loc` must be a source location that has been pre-processed. 2884 bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const; 2885 2886 /// Alter the state of whether this PP currently is in a 2887 /// "-Wunsafe-buffer-usage" opt-out region. 2888 /// 2889 /// \param isEnter true if this PP is entering a region; otherwise, this PP 2890 /// is exiting a region 2891 /// \param Loc the location of the entry or exit of a 2892 /// region 2893 /// \return true iff it is INVALID to enter or exit a region, i.e., 2894 /// attempt to enter a region before exiting a previous region, or exiting a 2895 /// region that PP is not currently in. 2896 bool enterOrExitSafeBufferOptOutRegion(bool isEnter, 2897 const SourceLocation &Loc); 2898 2899 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" 2900 /// opt-out region 2901 bool isPPInSafeBufferOptOutRegion(); 2902 2903 /// \param StartLoc output argument. It will be set to the start location of 2904 /// the current "-Wunsafe-buffer-usage" opt-out region iff this function 2905 /// returns true. 2906 /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" 2907 /// opt-out region 2908 bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); 2909 2910 private: 2911 /// Helper functions to forward lexing to the actual lexer. They all share the 2912 /// same signature. CLK_Lexer(Preprocessor & P,Token & Result)2913 static bool CLK_Lexer(Preprocessor &P, Token &Result) { 2914 return P.CurLexer->Lex(Result); 2915 } CLK_TokenLexer(Preprocessor & P,Token & Result)2916 static bool CLK_TokenLexer(Preprocessor &P, Token &Result) { 2917 return P.CurTokenLexer->Lex(Result); 2918 } CLK_CachingLexer(Preprocessor & P,Token & Result)2919 static bool CLK_CachingLexer(Preprocessor &P, Token &Result) { 2920 P.CachingLex(Result); 2921 return true; 2922 } CLK_DependencyDirectivesLexer(Preprocessor & P,Token & Result)2923 static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) { 2924 return P.CurLexer->LexDependencyDirectiveToken(Result); 2925 } CLK_LexAfterModuleImport(Preprocessor & P,Token & Result)2926 static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) { 2927 return P.LexAfterModuleImport(Result); 2928 } 2929 }; 2930 2931 /// Abstract base class that describes a handler that will receive 2932 /// source ranges for each of the comments encountered in the source file. 2933 class CommentHandler { 2934 public: 2935 virtual ~CommentHandler(); 2936 2937 // The handler shall return true if it has pushed any tokens 2938 // to be read using e.g. EnterToken or EnterTokenStream. 2939 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 2940 }; 2941 2942 /// Abstract base class that describes a handler that will receive 2943 /// source ranges for empty lines encountered in the source file. 2944 class EmptylineHandler { 2945 public: 2946 virtual ~EmptylineHandler(); 2947 2948 // The handler handles empty lines. 2949 virtual void HandleEmptyline(SourceRange Range) = 0; 2950 }; 2951 2952 /// Registry of pragma handlers added by plugins 2953 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; 2954 2955 } // namespace clang 2956 2957 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H 2958