1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 15 #define LLVM_CLANG_LEX_PREPROCESSOR_H 16 17 #include "clang/Basic/Diagnostic.h" 18 #include "clang/Basic/DiagnosticIDs.h" 19 #include "clang/Basic/IdentifierTable.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/LangOptions.h" 22 #include "clang/Basic/Module.h" 23 #include "clang/Basic/SourceLocation.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Basic/TokenKinds.h" 26 #include "clang/Lex/HeaderSearch.h" 27 #include "clang/Lex/Lexer.h" 28 #include "clang/Lex/MacroInfo.h" 29 #include "clang/Lex/ModuleLoader.h" 30 #include "clang/Lex/ModuleMap.h" 31 #include "clang/Lex/PPCallbacks.h" 32 #include "clang/Lex/Token.h" 33 #include "clang/Lex/TokenLexer.h" 34 #include "llvm/ADT/ArrayRef.h" 35 #include "llvm/ADT/DenseMap.h" 36 #include "llvm/ADT/FoldingSet.h" 37 #include "llvm/ADT/FunctionExtras.h" 38 #include "llvm/ADT/None.h" 39 #include "llvm/ADT/Optional.h" 40 #include "llvm/ADT/PointerUnion.h" 41 #include "llvm/ADT/STLExtras.h" 42 #include "llvm/ADT/SmallPtrSet.h" 43 #include "llvm/ADT/SmallVector.h" 44 #include "llvm/ADT/StringRef.h" 45 #include "llvm/ADT/TinyPtrVector.h" 46 #include "llvm/ADT/iterator_range.h" 47 #include "llvm/Support/Allocator.h" 48 #include "llvm/Support/Casting.h" 49 #include "llvm/Support/Registry.h" 50 #include <cassert> 51 #include <cstddef> 52 #include <cstdint> 53 #include <map> 54 #include <memory> 55 #include <string> 56 #include <utility> 57 #include <vector> 58 59 namespace llvm { 60 61 template<unsigned InternalLen> class SmallString; 62 63 } // namespace llvm 64 65 namespace clang { 66 67 class CodeCompletionHandler; 68 class CommentHandler; 69 class DirectoryEntry; 70 class EmptylineHandler; 71 class ExternalPreprocessorSource; 72 class FileEntry; 73 class FileManager; 74 class HeaderSearch; 75 class MacroArgs; 76 class PragmaHandler; 77 class PragmaNamespace; 78 class PreprocessingRecord; 79 class PreprocessorLexer; 80 class PreprocessorOptions; 81 class ScratchBuffer; 82 class TargetInfo; 83 84 namespace Builtin { 85 class Context; 86 } 87 88 /// Stores token information for comparing actual tokens with 89 /// predefined values. Only handles simple tokens and identifiers. 90 class TokenValue { 91 tok::TokenKind Kind; 92 IdentifierInfo *II; 93 94 public: 95 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 96 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 97 assert(Kind != tok::identifier && 98 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 99 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 100 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 101 } 102 103 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 104 105 bool operator==(const Token &Tok) const { 106 return Tok.getKind() == Kind && 107 (!II || II == Tok.getIdentifierInfo()); 108 } 109 }; 110 111 /// Context in which macro name is used. 112 enum MacroUse { 113 // other than #define or #undef 114 MU_Other = 0, 115 116 // macro name specified in #define 117 MU_Define = 1, 118 119 // macro name specified in #undef 120 MU_Undef = 2 121 }; 122 123 /// Engages in a tight little dance with the lexer to efficiently 124 /// preprocess tokens. 125 /// 126 /// Lexers know only about tokens within a single source file, and don't 127 /// know anything about preprocessor-level issues like the \#include stack, 128 /// token expansion, etc. 129 class Preprocessor { 130 friend class VAOptDefinitionContext; 131 friend class VariadicMacroScopeGuard; 132 133 llvm::unique_function<void(const clang::Token &)> OnToken; 134 std::shared_ptr<PreprocessorOptions> PPOpts; 135 DiagnosticsEngine *Diags; 136 LangOptions &LangOpts; 137 const TargetInfo *Target = nullptr; 138 const TargetInfo *AuxTarget = nullptr; 139 FileManager &FileMgr; 140 SourceManager &SourceMgr; 141 std::unique_ptr<ScratchBuffer> ScratchBuf; 142 HeaderSearch &HeaderInfo; 143 ModuleLoader &TheModuleLoader; 144 145 /// External source of macros. 146 ExternalPreprocessorSource *ExternalSource; 147 148 /// A BumpPtrAllocator object used to quickly allocate and release 149 /// objects internal to the Preprocessor. 150 llvm::BumpPtrAllocator BP; 151 152 /// Identifiers for builtin macros and other builtins. 153 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 154 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 155 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 156 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 157 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__ 158 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 159 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 160 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 161 IdentifierInfo *Ident__identifier; // __identifier 162 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 163 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__ 164 IdentifierInfo *Ident__has_feature; // __has_feature 165 IdentifierInfo *Ident__has_extension; // __has_extension 166 IdentifierInfo *Ident__has_builtin; // __has_builtin 167 IdentifierInfo *Ident__has_attribute; // __has_attribute 168 IdentifierInfo *Ident__has_include; // __has_include 169 IdentifierInfo *Ident__has_include_next; // __has_include_next 170 IdentifierInfo *Ident__has_warning; // __has_warning 171 IdentifierInfo *Ident__is_identifier; // __is_identifier 172 IdentifierInfo *Ident__building_module; // __building_module 173 IdentifierInfo *Ident__MODULE__; // __MODULE__ 174 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 175 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute 176 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 177 IdentifierInfo *Ident__is_target_arch; // __is_target_arch 178 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor 179 IdentifierInfo *Ident__is_target_os; // __is_target_os 180 IdentifierInfo *Ident__is_target_environment; // __is_target_environment 181 IdentifierInfo *Ident__is_target_variant_os; 182 IdentifierInfo *Ident__is_target_variant_environment; 183 IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD 184 185 // Weak, only valid (and set) while InMacroArgs is true. 186 Token* ArgMacro; 187 188 SourceLocation DATELoc, TIMELoc; 189 190 // FEM_UnsetOnCommandLine means that an explicit evaluation method was 191 // not specified on the command line. The target is queried to set the 192 // default evaluation method. 193 LangOptions::FPEvalMethodKind CurrentFPEvalMethod = 194 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 195 196 // Keeps the value of the last evaluation method before a 197 // `pragma float_control (precise,off) is applied. 198 LangOptions::FPEvalMethodKind LastFPEvalMethod = 199 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 200 201 // The most recent pragma location where the floating point evaluation 202 // method was modified. This is used to determine whether the 203 // 'pragma clang fp eval_method' was used whithin the current scope. 204 SourceLocation LastFPEvalPragmaLocation; 205 206 LangOptions::FPEvalMethodKind TUFPEvalMethod = 207 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 208 209 // Next __COUNTER__ value, starts at 0. 210 unsigned CounterValue = 0; 211 212 enum { 213 /// Maximum depth of \#includes. 214 MaxAllowedIncludeStackDepth = 200 215 }; 216 217 // State that is set before the preprocessor begins. 218 bool KeepComments : 1; 219 bool KeepMacroComments : 1; 220 bool SuppressIncludeNotFoundError : 1; 221 222 // State that changes while the preprocessor runs: 223 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 224 225 /// Whether the preprocessor owns the header search object. 226 bool OwnsHeaderSearch : 1; 227 228 /// True if macro expansion is disabled. 229 bool DisableMacroExpansion : 1; 230 231 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 232 /// when parsing preprocessor directives. 233 bool MacroExpansionInDirectivesOverride : 1; 234 235 class ResetMacroExpansionHelper; 236 237 /// Whether we have already loaded macros from the external source. 238 mutable bool ReadMacrosFromExternalSource : 1; 239 240 /// True if pragmas are enabled. 241 bool PragmasEnabled : 1; 242 243 /// True if the current build action is a preprocessing action. 244 bool PreprocessedOutput : 1; 245 246 /// True if we are currently preprocessing a #if or #elif directive 247 bool ParsingIfOrElifDirective; 248 249 /// True if we are pre-expanding macro arguments. 250 bool InMacroArgPreExpansion; 251 252 /// Mapping/lookup information for all identifiers in 253 /// the program, including program keywords. 254 mutable IdentifierTable Identifiers; 255 256 /// This table contains all the selectors in the program. 257 /// 258 /// Unlike IdentifierTable above, this table *isn't* populated by the 259 /// preprocessor. It is declared/expanded here because its role/lifetime is 260 /// conceptually similar to the IdentifierTable. In addition, the current 261 /// control flow (in clang::ParseAST()), make it convenient to put here. 262 /// 263 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 264 /// the lifetime of the preprocessor. 265 SelectorTable Selectors; 266 267 /// Information about builtins. 268 std::unique_ptr<Builtin::Context> BuiltinInfo; 269 270 /// Tracks all of the pragmas that the client registered 271 /// with this preprocessor. 272 std::unique_ptr<PragmaNamespace> PragmaHandlers; 273 274 /// Pragma handlers of the original source is stored here during the 275 /// parsing of a model file. 276 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 277 278 /// Tracks all of the comment handlers that the client registered 279 /// with this preprocessor. 280 std::vector<CommentHandler *> CommentHandlers; 281 282 /// Empty line handler. 283 EmptylineHandler *Emptyline = nullptr; 284 285 /// True if we want to ignore EOF token and continue later on (thus 286 /// avoid tearing the Lexer and etc. down). 287 bool IncrementalProcessing = false; 288 289 public: 290 /// The kind of translation unit we are processing. 291 const TranslationUnitKind TUKind; 292 293 private: 294 /// The code-completion handler. 295 CodeCompletionHandler *CodeComplete = nullptr; 296 297 /// The file that we're performing code-completion for, if any. 298 const FileEntry *CodeCompletionFile = nullptr; 299 300 /// The offset in file for the code-completion point. 301 unsigned CodeCompletionOffset = 0; 302 303 /// The location for the code-completion point. This gets instantiated 304 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 305 SourceLocation CodeCompletionLoc; 306 307 /// The start location for the file of the code-completion point. 308 /// 309 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 310 /// for preprocessing. 311 SourceLocation CodeCompletionFileLoc; 312 313 /// The source location of the \c import contextual keyword we just 314 /// lexed, if any. 315 SourceLocation ModuleImportLoc; 316 317 /// The module import path that we're currently processing. 318 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath; 319 320 /// Whether the last token we lexed was an '@'. 321 bool LastTokenWasAt = false; 322 323 /// A position within a C++20 import-seq. 324 class ImportSeq { 325 public: 326 enum State : int { 327 // Positive values represent a number of unclosed brackets. 328 AtTopLevel = 0, 329 AfterTopLevelTokenSeq = -1, 330 AfterExport = -2, 331 AfterImportSeq = -3, 332 }; 333 334 ImportSeq(State S) : S(S) {} 335 336 /// Saw any kind of open bracket. 337 void handleOpenBracket() { 338 S = static_cast<State>(std::max<int>(S, 0) + 1); 339 } 340 /// Saw any kind of close bracket other than '}'. 341 void handleCloseBracket() { 342 S = static_cast<State>(std::max<int>(S, 1) - 1); 343 } 344 /// Saw a close brace. 345 void handleCloseBrace() { 346 handleCloseBracket(); 347 if (S == AtTopLevel && !AfterHeaderName) 348 S = AfterTopLevelTokenSeq; 349 } 350 /// Saw a semicolon. 351 void handleSemi() { 352 if (atTopLevel()) { 353 S = AfterTopLevelTokenSeq; 354 AfterHeaderName = false; 355 } 356 } 357 358 /// Saw an 'export' identifier. 359 void handleExport() { 360 if (S == AfterTopLevelTokenSeq) 361 S = AfterExport; 362 else if (S <= 0) 363 S = AtTopLevel; 364 } 365 /// Saw an 'import' identifier. 366 void handleImport() { 367 if (S == AfterTopLevelTokenSeq || S == AfterExport) 368 S = AfterImportSeq; 369 else if (S <= 0) 370 S = AtTopLevel; 371 } 372 373 /// Saw a 'header-name' token; do not recognize any more 'import' tokens 374 /// until we reach a top-level semicolon. 375 void handleHeaderName() { 376 if (S == AfterImportSeq) 377 AfterHeaderName = true; 378 handleMisc(); 379 } 380 381 /// Saw any other token. 382 void handleMisc() { 383 if (S <= 0) 384 S = AtTopLevel; 385 } 386 387 bool atTopLevel() { return S <= 0; } 388 bool afterImportSeq() { return S == AfterImportSeq; } 389 bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; } 390 391 private: 392 State S; 393 /// Whether we're in the pp-import-suffix following the header-name in a 394 /// pp-import. If so, a close-brace is not sufficient to end the 395 /// top-level-token-seq of an import-seq. 396 bool AfterHeaderName = false; 397 }; 398 399 /// Our current position within a C++20 import-seq. 400 ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq; 401 402 /// Track whether we are in a Global Module Fragment 403 class TrackGMF { 404 public: 405 enum GMFState : int { 406 GMFActive = 1, 407 MaybeGMF = 0, 408 BeforeGMFIntroducer = -1, 409 GMFAbsentOrEnded = -2, 410 }; 411 412 TrackGMF(GMFState S) : S(S) {} 413 414 /// Saw a semicolon. 415 void handleSemi() { 416 // If it is immediately after the first instance of the module keyword, 417 // then that introduces the GMF. 418 if (S == MaybeGMF) 419 S = GMFActive; 420 } 421 422 /// Saw an 'export' identifier. 423 void handleExport() { 424 // The presence of an 'export' keyword always ends or excludes a GMF. 425 S = GMFAbsentOrEnded; 426 } 427 428 /// Saw an 'import' identifier. 429 void handleImport(bool AfterTopLevelTokenSeq) { 430 // If we see this before any 'module' kw, then we have no GMF. 431 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 432 S = GMFAbsentOrEnded; 433 } 434 435 /// Saw a 'module' identifier. 436 void handleModule(bool AfterTopLevelTokenSeq) { 437 // This was the first module identifier and not preceded by any token 438 // that would exclude a GMF. It could begin a GMF, but only if directly 439 // followed by a semicolon. 440 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 441 S = MaybeGMF; 442 else 443 S = GMFAbsentOrEnded; 444 } 445 446 /// Saw any other token. 447 void handleMisc() { 448 // We saw something other than ; after the 'module' kw, so not a GMF. 449 if (S == MaybeGMF) 450 S = GMFAbsentOrEnded; 451 } 452 453 bool inGMF() { return S == GMFActive; } 454 455 private: 456 /// Track the transitions into and out of a Global Module Fragment, 457 /// if one is present. 458 GMFState S; 459 }; 460 461 TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer; 462 463 /// Whether the module import expects an identifier next. Otherwise, 464 /// it expects a '.' or ';'. 465 bool ModuleImportExpectsIdentifier = false; 466 467 /// The identifier and source location of the currently-active 468 /// \#pragma clang arc_cf_code_audited begin. 469 std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo; 470 471 /// The source location of the currently-active 472 /// \#pragma clang assume_nonnull begin. 473 SourceLocation PragmaAssumeNonNullLoc; 474 475 /// Set only for preambles which end with an active 476 /// \#pragma clang assume_nonnull begin. 477 /// 478 /// When the preamble is loaded into the main file, 479 /// `PragmaAssumeNonNullLoc` will be set to this to 480 /// replay the unterminated assume_nonnull. 481 SourceLocation PreambleRecordedPragmaAssumeNonNullLoc; 482 483 /// True if we hit the code-completion point. 484 bool CodeCompletionReached = false; 485 486 /// The code completion token containing the information 487 /// on the stem that is to be code completed. 488 IdentifierInfo *CodeCompletionII = nullptr; 489 490 /// Range for the code completion token. 491 SourceRange CodeCompletionTokenRange; 492 493 /// The directory that the main file should be considered to occupy, 494 /// if it does not correspond to a real file (as happens when building a 495 /// module). 496 const DirectoryEntry *MainFileDir = nullptr; 497 498 /// The number of bytes that we will initially skip when entering the 499 /// main file, along with a flag that indicates whether skipping this number 500 /// of bytes will place the lexer at the start of a line. 501 /// 502 /// This is used when loading a precompiled preamble. 503 std::pair<int, bool> SkipMainFilePreamble; 504 505 /// Whether we hit an error due to reaching max allowed include depth. Allows 506 /// to avoid hitting the same error over and over again. 507 bool HasReachedMaxIncludeDepth = false; 508 509 /// The number of currently-active calls to Lex. 510 /// 511 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often 512 /// require asking for multiple additional tokens. This counter makes it 513 /// possible for Lex to detect whether it's producing a token for the end 514 /// of phase 4 of translation or for some other situation. 515 unsigned LexLevel = 0; 516 517 /// The number of (LexLevel 0) preprocessor tokens. 518 unsigned TokenCount = 0; 519 520 /// Preprocess every token regardless of LexLevel. 521 bool PreprocessToken = false; 522 523 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens 524 /// warning, or zero for unlimited. 525 unsigned MaxTokens = 0; 526 SourceLocation MaxTokensOverrideLoc; 527 528 public: 529 struct PreambleSkipInfo { 530 SourceLocation HashTokenLoc; 531 SourceLocation IfTokenLoc; 532 bool FoundNonSkipPortion; 533 bool FoundElse; 534 SourceLocation ElseLoc; 535 536 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, 537 bool FoundNonSkipPortion, bool FoundElse, 538 SourceLocation ElseLoc) 539 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc), 540 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse), 541 ElseLoc(ElseLoc) {} 542 }; 543 544 using IncludedFilesSet = llvm::DenseSet<const FileEntry *>; 545 546 private: 547 friend class ASTReader; 548 friend class MacroArgs; 549 550 class PreambleConditionalStackStore { 551 enum State { 552 Off = 0, 553 Recording = 1, 554 Replaying = 2, 555 }; 556 557 public: 558 PreambleConditionalStackStore() = default; 559 560 void startRecording() { ConditionalStackState = Recording; } 561 void startReplaying() { ConditionalStackState = Replaying; } 562 bool isRecording() const { return ConditionalStackState == Recording; } 563 bool isReplaying() const { return ConditionalStackState == Replaying; } 564 565 ArrayRef<PPConditionalInfo> getStack() const { 566 return ConditionalStack; 567 } 568 569 void doneReplaying() { 570 ConditionalStack.clear(); 571 ConditionalStackState = Off; 572 } 573 574 void setStack(ArrayRef<PPConditionalInfo> s) { 575 if (!isRecording() && !isReplaying()) 576 return; 577 ConditionalStack.clear(); 578 ConditionalStack.append(s.begin(), s.end()); 579 } 580 581 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); } 582 583 bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); } 584 585 void clearSkipInfo() { SkipInfo.reset(); } 586 587 llvm::Optional<PreambleSkipInfo> SkipInfo; 588 589 private: 590 SmallVector<PPConditionalInfo, 4> ConditionalStack; 591 State ConditionalStackState = Off; 592 } PreambleConditionalStack; 593 594 /// The current top of the stack that we're lexing from if 595 /// not expanding a macro and we are lexing directly from source code. 596 /// 597 /// Only one of CurLexer, or CurTokenLexer will be non-null. 598 std::unique_ptr<Lexer> CurLexer; 599 600 /// The current top of the stack what we're lexing from 601 /// if not expanding a macro. 602 /// 603 /// This is an alias for CurLexer. 604 PreprocessorLexer *CurPPLexer = nullptr; 605 606 /// Used to find the current FileEntry, if CurLexer is non-null 607 /// and if applicable. 608 /// 609 /// This allows us to implement \#include_next and find directory-specific 610 /// properties. 611 ConstSearchDirIterator CurDirLookup = nullptr; 612 613 /// The current macro we are expanding, if we are expanding a macro. 614 /// 615 /// One of CurLexer and CurTokenLexer must be null. 616 std::unique_ptr<TokenLexer> CurTokenLexer; 617 618 /// The kind of lexer we're currently working with. 619 enum CurLexerKind { 620 CLK_Lexer, 621 CLK_TokenLexer, 622 CLK_CachingLexer, 623 CLK_DependencyDirectivesLexer, 624 CLK_LexAfterModuleImport 625 } CurLexerKind = CLK_Lexer; 626 627 /// If the current lexer is for a submodule that is being built, this 628 /// is that submodule. 629 Module *CurLexerSubmodule = nullptr; 630 631 /// Keeps track of the stack of files currently 632 /// \#included, and macros currently being expanded from, not counting 633 /// CurLexer/CurTokenLexer. 634 struct IncludeStackInfo { 635 enum CurLexerKind CurLexerKind; 636 Module *TheSubmodule; 637 std::unique_ptr<Lexer> TheLexer; 638 PreprocessorLexer *ThePPLexer; 639 std::unique_ptr<TokenLexer> TheTokenLexer; 640 ConstSearchDirIterator TheDirLookup; 641 642 // The following constructors are completely useless copies of the default 643 // versions, only needed to pacify MSVC. 644 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, 645 std::unique_ptr<Lexer> &&TheLexer, 646 PreprocessorLexer *ThePPLexer, 647 std::unique_ptr<TokenLexer> &&TheTokenLexer, 648 ConstSearchDirIterator TheDirLookup) 649 : CurLexerKind(std::move(CurLexerKind)), 650 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 651 ThePPLexer(std::move(ThePPLexer)), 652 TheTokenLexer(std::move(TheTokenLexer)), 653 TheDirLookup(std::move(TheDirLookup)) {} 654 }; 655 std::vector<IncludeStackInfo> IncludeMacroStack; 656 657 /// Actions invoked when some preprocessor activity is 658 /// encountered (e.g. a file is \#included, etc). 659 std::unique_ptr<PPCallbacks> Callbacks; 660 661 struct MacroExpandsInfo { 662 Token Tok; 663 MacroDefinition MD; 664 SourceRange Range; 665 666 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 667 : Tok(Tok), MD(MD), Range(Range) {} 668 }; 669 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 670 671 /// Information about a name that has been used to define a module macro. 672 struct ModuleMacroInfo { 673 /// The most recent macro directive for this identifier. 674 MacroDirective *MD; 675 676 /// The active module macros for this identifier. 677 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros; 678 679 /// The generation number at which we last updated ActiveModuleMacros. 680 /// \see Preprocessor::VisibleModules. 681 unsigned ActiveModuleMacrosGeneration = 0; 682 683 /// Whether this macro name is ambiguous. 684 bool IsAmbiguous = false; 685 686 /// The module macros that are overridden by this macro. 687 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros; 688 689 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {} 690 }; 691 692 /// The state of a macro for an identifier. 693 class MacroState { 694 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 695 696 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 697 const IdentifierInfo *II) const { 698 if (II->isOutOfDate()) 699 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 700 // FIXME: Find a spare bit on IdentifierInfo and store a 701 // HasModuleMacros flag. 702 if (!II->hasMacroDefinition() || 703 (!PP.getLangOpts().Modules && 704 !PP.getLangOpts().ModulesLocalVisibility) || 705 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 706 return nullptr; 707 708 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 709 if (!Info) { 710 Info = new (PP.getPreprocessorAllocator()) 711 ModuleMacroInfo(State.get<MacroDirective *>()); 712 State = Info; 713 } 714 715 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 716 Info->ActiveModuleMacrosGeneration) 717 PP.updateModuleMacroInfo(II, *Info); 718 return Info; 719 } 720 721 public: 722 MacroState() : MacroState(nullptr) {} 723 MacroState(MacroDirective *MD) : State(MD) {} 724 725 MacroState(MacroState &&O) noexcept : State(O.State) { 726 O.State = (MacroDirective *)nullptr; 727 } 728 729 MacroState &operator=(MacroState &&O) noexcept { 730 auto S = O.State; 731 O.State = (MacroDirective *)nullptr; 732 State = S; 733 return *this; 734 } 735 736 ~MacroState() { 737 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 738 Info->~ModuleMacroInfo(); 739 } 740 741 MacroDirective *getLatest() const { 742 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 743 return Info->MD; 744 return State.get<MacroDirective*>(); 745 } 746 747 void setLatest(MacroDirective *MD) { 748 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 749 Info->MD = MD; 750 else 751 State = MD; 752 } 753 754 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 755 auto *Info = getModuleInfo(PP, II); 756 return Info ? Info->IsAmbiguous : false; 757 } 758 759 ArrayRef<ModuleMacro *> 760 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 761 if (auto *Info = getModuleInfo(PP, II)) 762 return Info->ActiveModuleMacros; 763 return None; 764 } 765 766 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 767 SourceManager &SourceMgr) const { 768 // FIXME: Incorporate module macros into the result of this. 769 if (auto *Latest = getLatest()) 770 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 771 return {}; 772 } 773 774 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 775 if (auto *Info = getModuleInfo(PP, II)) { 776 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 777 Info->ActiveModuleMacros.begin(), 778 Info->ActiveModuleMacros.end()); 779 Info->ActiveModuleMacros.clear(); 780 Info->IsAmbiguous = false; 781 } 782 } 783 784 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 785 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 786 return Info->OverriddenMacros; 787 return None; 788 } 789 790 void setOverriddenMacros(Preprocessor &PP, 791 ArrayRef<ModuleMacro *> Overrides) { 792 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 793 if (!Info) { 794 if (Overrides.empty()) 795 return; 796 Info = new (PP.getPreprocessorAllocator()) 797 ModuleMacroInfo(State.get<MacroDirective *>()); 798 State = Info; 799 } 800 Info->OverriddenMacros.clear(); 801 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 802 Overrides.begin(), Overrides.end()); 803 Info->ActiveModuleMacrosGeneration = 0; 804 } 805 }; 806 807 /// For each IdentifierInfo that was associated with a macro, we 808 /// keep a mapping to the history of all macro definitions and #undefs in 809 /// the reverse order (the latest one is in the head of the list). 810 /// 811 /// This mapping lives within the \p CurSubmoduleState. 812 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>; 813 814 struct SubmoduleState; 815 816 /// Information about a submodule that we're currently building. 817 struct BuildingSubmoduleInfo { 818 /// The module that we are building. 819 Module *M; 820 821 /// The location at which the module was included. 822 SourceLocation ImportLoc; 823 824 /// Whether we entered this submodule via a pragma. 825 bool IsPragma; 826 827 /// The previous SubmoduleState. 828 SubmoduleState *OuterSubmoduleState; 829 830 /// The number of pending module macro names when we started building this. 831 unsigned OuterPendingModuleMacroNames; 832 833 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma, 834 SubmoduleState *OuterSubmoduleState, 835 unsigned OuterPendingModuleMacroNames) 836 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma), 837 OuterSubmoduleState(OuterSubmoduleState), 838 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 839 }; 840 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 841 842 /// Information about a submodule's preprocessor state. 843 struct SubmoduleState { 844 /// The macros for the submodule. 845 MacroMap Macros; 846 847 /// The set of modules that are visible within the submodule. 848 VisibleModuleSet VisibleModules; 849 850 // FIXME: CounterValue? 851 // FIXME: PragmaPushMacroInfo? 852 }; 853 std::map<Module *, SubmoduleState> Submodules; 854 855 /// The preprocessor state for preprocessing outside of any submodule. 856 SubmoduleState NullSubmoduleState; 857 858 /// The current submodule state. Will be \p NullSubmoduleState if we're not 859 /// in a submodule. 860 SubmoduleState *CurSubmoduleState; 861 862 /// The files that have been included. 863 IncludedFilesSet IncludedFiles; 864 865 /// The set of known macros exported from modules. 866 llvm::FoldingSet<ModuleMacro> ModuleMacros; 867 868 /// The names of potential module macros that we've not yet processed. 869 llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames; 870 871 /// The list of module macros, for each identifier, that are not overridden by 872 /// any other module macro. 873 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>> 874 LeafModuleMacros; 875 876 /// Macros that we want to warn because they are not used at the end 877 /// of the translation unit. 878 /// 879 /// We store just their SourceLocations instead of 880 /// something like MacroInfo*. The benefit of this is that when we are 881 /// deserializing from PCH, we don't need to deserialize identifier & macros 882 /// just so that we can report that they are unused, we just warn using 883 /// the SourceLocations of this set (that will be filled by the ASTReader). 884 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>; 885 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 886 887 /// This is a pair of an optional message and source location used for pragmas 888 /// that annotate macros like pragma clang restrict_expansion and pragma clang 889 /// deprecated. This pair stores the optional message and the location of the 890 /// annotation pragma for use producing diagnostics and notes. 891 using MsgLocationPair = std::pair<std::string, SourceLocation>; 892 893 struct MacroAnnotationInfo { 894 SourceLocation Location; 895 std::string Message; 896 }; 897 898 struct MacroAnnotations { 899 llvm::Optional<MacroAnnotationInfo> DeprecationInfo; 900 llvm::Optional<MacroAnnotationInfo> RestrictExpansionInfo; 901 llvm::Optional<SourceLocation> FinalAnnotationLoc; 902 903 static MacroAnnotations makeDeprecation(SourceLocation Loc, 904 std::string Msg) { 905 return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)}, 906 llvm::None, llvm::None}; 907 } 908 909 static MacroAnnotations makeRestrictExpansion(SourceLocation Loc, 910 std::string Msg) { 911 return MacroAnnotations{ 912 llvm::None, MacroAnnotationInfo{Loc, std::move(Msg)}, llvm::None}; 913 } 914 915 static MacroAnnotations makeFinal(SourceLocation Loc) { 916 return MacroAnnotations{llvm::None, llvm::None, Loc}; 917 } 918 }; 919 920 /// Warning information for macro annotations. 921 llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos; 922 923 /// A "freelist" of MacroArg objects that can be 924 /// reused for quick allocation. 925 MacroArgs *MacroArgCache = nullptr; 926 927 /// For each IdentifierInfo used in a \#pragma push_macro directive, 928 /// we keep a MacroInfo stack used to restore the previous macro value. 929 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>> 930 PragmaPushMacroInfo; 931 932 // Various statistics we track for performance analysis. 933 unsigned NumDirectives = 0; 934 unsigned NumDefined = 0; 935 unsigned NumUndefined = 0; 936 unsigned NumPragma = 0; 937 unsigned NumIf = 0; 938 unsigned NumElse = 0; 939 unsigned NumEndif = 0; 940 unsigned NumEnteredSourceFiles = 0; 941 unsigned MaxIncludeStackDepth = 0; 942 unsigned NumMacroExpanded = 0; 943 unsigned NumFnMacroExpanded = 0; 944 unsigned NumBuiltinMacroExpanded = 0; 945 unsigned NumFastMacroExpanded = 0; 946 unsigned NumTokenPaste = 0; 947 unsigned NumFastTokenPaste = 0; 948 unsigned NumSkipped = 0; 949 950 /// The predefined macros that preprocessor should use from the 951 /// command line etc. 952 std::string Predefines; 953 954 /// The file ID for the preprocessor predefines. 955 FileID PredefinesFileID; 956 957 /// The file ID for the PCH through header. 958 FileID PCHThroughHeaderFileID; 959 960 /// Whether tokens are being skipped until a #pragma hdrstop is seen. 961 bool SkippingUntilPragmaHdrStop = false; 962 963 /// Whether tokens are being skipped until the through header is seen. 964 bool SkippingUntilPCHThroughHeader = false; 965 966 /// \{ 967 /// Cache of macro expanders to reduce malloc traffic. 968 enum { TokenLexerCacheSize = 8 }; 969 unsigned NumCachedTokenLexers; 970 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 971 /// \} 972 973 /// Keeps macro expanded tokens for TokenLexers. 974 // 975 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 976 /// going to lex in the cache and when it finishes the tokens are removed 977 /// from the end of the cache. 978 SmallVector<Token, 16> MacroExpandedTokens; 979 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack; 980 981 /// A record of the macro definitions and expansions that 982 /// occurred during preprocessing. 983 /// 984 /// This is an optional side structure that can be enabled with 985 /// \c createPreprocessingRecord() prior to preprocessing. 986 PreprocessingRecord *Record = nullptr; 987 988 /// Cached tokens state. 989 using CachedTokensTy = SmallVector<Token, 1>; 990 991 /// Cached tokens are stored here when we do backtracking or 992 /// lookahead. They are "lexed" by the CachingLex() method. 993 CachedTokensTy CachedTokens; 994 995 /// The position of the cached token that CachingLex() should 996 /// "lex" next. 997 /// 998 /// If it points beyond the CachedTokens vector, it means that a normal 999 /// Lex() should be invoked. 1000 CachedTokensTy::size_type CachedLexPos = 0; 1001 1002 /// Stack of backtrack positions, allowing nested backtracks. 1003 /// 1004 /// The EnableBacktrackAtThisPos() method pushes a position to 1005 /// indicate where CachedLexPos should be set when the BackTrack() method is 1006 /// invoked (at which point the last position is popped). 1007 std::vector<CachedTokensTy::size_type> BacktrackPositions; 1008 1009 struct MacroInfoChain { 1010 MacroInfo MI; 1011 MacroInfoChain *Next; 1012 }; 1013 1014 /// MacroInfos are managed as a chain for easy disposal. This is the head 1015 /// of that list. 1016 MacroInfoChain *MIChainHead = nullptr; 1017 1018 /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running. 1019 /// This is used to guard against calling this function recursively. 1020 /// 1021 /// See comments at the use-site for more context about why it is needed. 1022 bool SkippingExcludedConditionalBlock = false; 1023 1024 /// Keeps track of skipped range mappings that were recorded while skipping 1025 /// excluded conditional directives. It maps the source buffer pointer at 1026 /// the beginning of a skipped block, to the number of bytes that should be 1027 /// skipped. 1028 llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges; 1029 1030 void updateOutOfDateIdentifier(IdentifierInfo &II) const; 1031 1032 public: 1033 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 1034 DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM, 1035 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 1036 IdentifierInfoLookup *IILookup = nullptr, 1037 bool OwnsHeaderSearch = false, 1038 TranslationUnitKind TUKind = TU_Complete); 1039 1040 ~Preprocessor(); 1041 1042 /// Initialize the preprocessor using information about the target. 1043 /// 1044 /// \param Target is owned by the caller and must remain valid for the 1045 /// lifetime of the preprocessor. 1046 /// \param AuxTarget is owned by the caller and must remain valid for 1047 /// the lifetime of the preprocessor. 1048 void Initialize(const TargetInfo &Target, 1049 const TargetInfo *AuxTarget = nullptr); 1050 1051 /// Initialize the preprocessor to parse a model file 1052 /// 1053 /// To parse model files the preprocessor of the original source is reused to 1054 /// preserver the identifier table. However to avoid some duplicate 1055 /// information in the preprocessor some cleanup is needed before it is used 1056 /// to parse model files. This method does that cleanup. 1057 void InitializeForModelFile(); 1058 1059 /// Cleanup after model file parsing 1060 void FinalizeForModelFile(); 1061 1062 /// Retrieve the preprocessor options used to initialize this 1063 /// preprocessor. 1064 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 1065 1066 DiagnosticsEngine &getDiagnostics() const { return *Diags; } 1067 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 1068 1069 const LangOptions &getLangOpts() const { return LangOpts; } 1070 const TargetInfo &getTargetInfo() const { return *Target; } 1071 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } 1072 FileManager &getFileManager() const { return FileMgr; } 1073 SourceManager &getSourceManager() const { return SourceMgr; } 1074 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 1075 1076 IdentifierTable &getIdentifierTable() { return Identifiers; } 1077 const IdentifierTable &getIdentifierTable() const { return Identifiers; } 1078 SelectorTable &getSelectorTable() { return Selectors; } 1079 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; } 1080 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 1081 1082 void setExternalSource(ExternalPreprocessorSource *Source) { 1083 ExternalSource = Source; 1084 } 1085 1086 ExternalPreprocessorSource *getExternalSource() const { 1087 return ExternalSource; 1088 } 1089 1090 /// Retrieve the module loader associated with this preprocessor. 1091 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 1092 1093 bool hadModuleLoaderFatalFailure() const { 1094 return TheModuleLoader.HadFatalFailure; 1095 } 1096 1097 /// Retrieve the number of Directives that have been processed by the 1098 /// Preprocessor. 1099 unsigned getNumDirectives() const { 1100 return NumDirectives; 1101 } 1102 1103 /// True if we are currently preprocessing a #if or #elif directive 1104 bool isParsingIfOrElifDirective() const { 1105 return ParsingIfOrElifDirective; 1106 } 1107 1108 /// Control whether the preprocessor retains comments in output. 1109 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 1110 this->KeepComments = KeepComments | KeepMacroComments; 1111 this->KeepMacroComments = KeepMacroComments; 1112 } 1113 1114 bool getCommentRetentionState() const { return KeepComments; } 1115 1116 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } 1117 bool getPragmasEnabled() const { return PragmasEnabled; } 1118 1119 void SetSuppressIncludeNotFoundError(bool Suppress) { 1120 SuppressIncludeNotFoundError = Suppress; 1121 } 1122 1123 bool GetSuppressIncludeNotFoundError() { 1124 return SuppressIncludeNotFoundError; 1125 } 1126 1127 /// Sets whether the preprocessor is responsible for producing output or if 1128 /// it is producing tokens to be consumed by Parse and Sema. 1129 void setPreprocessedOutput(bool IsPreprocessedOutput) { 1130 PreprocessedOutput = IsPreprocessedOutput; 1131 } 1132 1133 /// Returns true if the preprocessor is responsible for generating output, 1134 /// false if it is producing tokens to be consumed by Parse and Sema. 1135 bool isPreprocessedOutput() const { return PreprocessedOutput; } 1136 1137 /// Return true if we are lexing directly from the specified lexer. 1138 bool isCurrentLexer(const PreprocessorLexer *L) const { 1139 return CurPPLexer == L; 1140 } 1141 1142 /// Return the current lexer being lexed from. 1143 /// 1144 /// Note that this ignores any potentially active macro expansions and _Pragma 1145 /// expansions going on at the time. 1146 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 1147 1148 /// Return the current file lexer being lexed from. 1149 /// 1150 /// Note that this ignores any potentially active macro expansions and _Pragma 1151 /// expansions going on at the time. 1152 PreprocessorLexer *getCurrentFileLexer() const; 1153 1154 /// Return the submodule owning the file being lexed. This may not be 1155 /// the current module if we have changed modules since entering the file. 1156 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; } 1157 1158 /// Returns the FileID for the preprocessor predefines. 1159 FileID getPredefinesFileID() const { return PredefinesFileID; } 1160 1161 /// \{ 1162 /// Accessors for preprocessor callbacks. 1163 /// 1164 /// Note that this class takes ownership of any PPCallbacks object given to 1165 /// it. 1166 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } 1167 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 1168 if (Callbacks) 1169 C = std::make_unique<PPChainedCallbacks>(std::move(C), 1170 std::move(Callbacks)); 1171 Callbacks = std::move(C); 1172 } 1173 /// \} 1174 1175 /// Get the number of tokens processed so far. 1176 unsigned getTokenCount() const { return TokenCount; } 1177 1178 /// Get the max number of tokens before issuing a -Wmax-tokens warning. 1179 unsigned getMaxTokens() const { return MaxTokens; } 1180 1181 void overrideMaxTokens(unsigned Value, SourceLocation Loc) { 1182 MaxTokens = Value; 1183 MaxTokensOverrideLoc = Loc; 1184 }; 1185 1186 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; } 1187 1188 /// Register a function that would be called on each token in the final 1189 /// expanded token stream. 1190 /// This also reports annotation tokens produced by the parser. 1191 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) { 1192 OnToken = std::move(F); 1193 } 1194 1195 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; } 1196 1197 bool isMacroDefined(StringRef Id) { 1198 return isMacroDefined(&Identifiers.get(Id)); 1199 } 1200 bool isMacroDefined(const IdentifierInfo *II) { 1201 return II->hasMacroDefinition() && 1202 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 1203 } 1204 1205 /// Determine whether II is defined as a macro within the module M, 1206 /// if that is a module that we've already preprocessed. Does not check for 1207 /// macros imported into M. 1208 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 1209 if (!II->hasMacroDefinition()) 1210 return false; 1211 auto I = Submodules.find(M); 1212 if (I == Submodules.end()) 1213 return false; 1214 auto J = I->second.Macros.find(II); 1215 if (J == I->second.Macros.end()) 1216 return false; 1217 auto *MD = J->second.getLatest(); 1218 return MD && MD->isDefined(); 1219 } 1220 1221 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 1222 if (!II->hasMacroDefinition()) 1223 return {}; 1224 1225 MacroState &S = CurSubmoduleState->Macros[II]; 1226 auto *MD = S.getLatest(); 1227 while (MD && isa<VisibilityMacroDirective>(MD)) 1228 MD = MD->getPrevious(); 1229 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 1230 S.getActiveModuleMacros(*this, II), 1231 S.isAmbiguous(*this, II)); 1232 } 1233 1234 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 1235 SourceLocation Loc) { 1236 if (!II->hadMacroDefinition()) 1237 return {}; 1238 1239 MacroState &S = CurSubmoduleState->Macros[II]; 1240 MacroDirective::DefInfo DI; 1241 if (auto *MD = S.getLatest()) 1242 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 1243 // FIXME: Compute the set of active module macros at the specified location. 1244 return MacroDefinition(DI.getDirective(), 1245 S.getActiveModuleMacros(*this, II), 1246 S.isAmbiguous(*this, II)); 1247 } 1248 1249 /// Given an identifier, return its latest non-imported MacroDirective 1250 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. 1251 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 1252 if (!II->hasMacroDefinition()) 1253 return nullptr; 1254 1255 auto *MD = getLocalMacroDirectiveHistory(II); 1256 if (!MD || MD->getDefinition().isUndefined()) 1257 return nullptr; 1258 1259 return MD; 1260 } 1261 1262 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 1263 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 1264 } 1265 1266 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 1267 if (!II->hasMacroDefinition()) 1268 return nullptr; 1269 if (auto MD = getMacroDefinition(II)) 1270 return MD.getMacroInfo(); 1271 return nullptr; 1272 } 1273 1274 /// Given an identifier, return the latest non-imported macro 1275 /// directive for that identifier. 1276 /// 1277 /// One can iterate over all previous macro directives from the most recent 1278 /// one. 1279 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 1280 1281 /// Add a directive to the macro directive history for this identifier. 1282 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); 1283 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 1284 SourceLocation Loc) { 1285 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 1286 appendMacroDirective(II, MD); 1287 return MD; 1288 } 1289 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 1290 MacroInfo *MI) { 1291 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 1292 } 1293 1294 /// Set a MacroDirective that was loaded from a PCH file. 1295 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, 1296 MacroDirective *MD); 1297 1298 /// Register an exported macro for a module and identifier. 1299 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, 1300 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 1301 ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II); 1302 1303 /// Get the list of leaf (non-overridden) module macros for a name. 1304 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 1305 if (II->isOutOfDate()) 1306 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 1307 auto I = LeafModuleMacros.find(II); 1308 if (I != LeafModuleMacros.end()) 1309 return I->second; 1310 return None; 1311 } 1312 1313 /// Get the list of submodules that we're currently building. 1314 ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const { 1315 return BuildingSubmoduleStack; 1316 } 1317 1318 /// \{ 1319 /// Iterators for the macro history table. Currently defined macros have 1320 /// IdentifierInfo::hasMacroDefinition() set and an empty 1321 /// MacroInfo::getUndefLoc() at the head of the list. 1322 using macro_iterator = MacroMap::const_iterator; 1323 1324 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 1325 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 1326 1327 llvm::iterator_range<macro_iterator> 1328 macros(bool IncludeExternalMacros = true) const { 1329 macro_iterator begin = macro_begin(IncludeExternalMacros); 1330 macro_iterator end = macro_end(IncludeExternalMacros); 1331 return llvm::make_range(begin, end); 1332 } 1333 1334 /// \} 1335 1336 /// Mark the file as included. 1337 /// Returns true if this is the first time the file was included. 1338 bool markIncluded(const FileEntry *File) { 1339 HeaderInfo.getFileInfo(File); 1340 return IncludedFiles.insert(File).second; 1341 } 1342 1343 /// Return true if this header has already been included. 1344 bool alreadyIncluded(const FileEntry *File) const { 1345 return IncludedFiles.count(File); 1346 } 1347 1348 /// Get the set of included files. 1349 IncludedFilesSet &getIncludedFiles() { return IncludedFiles; } 1350 const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; } 1351 1352 /// Return the name of the macro defined before \p Loc that has 1353 /// spelling \p Tokens. If there are multiple macros with same spelling, 1354 /// return the last one defined. 1355 StringRef getLastMacroWithSpelling(SourceLocation Loc, 1356 ArrayRef<TokenValue> Tokens) const; 1357 1358 /// Get the predefines for this processor. 1359 /// Used by some third-party tools to inspect and add predefines (see 1360 /// https://github.com/llvm/llvm-project/issues/57483). 1361 const std::string &getPredefines() const { return Predefines; } 1362 1363 /// Set the predefines for this Preprocessor. 1364 /// 1365 /// These predefines are automatically injected when parsing the main file. 1366 void setPredefines(std::string P) { Predefines = std::move(P); } 1367 1368 /// Return information about the specified preprocessor 1369 /// identifier token. 1370 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 1371 return &Identifiers.get(Name); 1372 } 1373 1374 /// Add the specified pragma handler to this preprocessor. 1375 /// 1376 /// If \p Namespace is non-null, then it is a token required to exist on the 1377 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 1378 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); 1379 void AddPragmaHandler(PragmaHandler *Handler) { 1380 AddPragmaHandler(StringRef(), Handler); 1381 } 1382 1383 /// Remove the specific pragma handler from this preprocessor. 1384 /// 1385 /// If \p Namespace is non-null, then it should be the namespace that 1386 /// \p Handler was added to. It is an error to remove a handler that 1387 /// has not been registered. 1388 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); 1389 void RemovePragmaHandler(PragmaHandler *Handler) { 1390 RemovePragmaHandler(StringRef(), Handler); 1391 } 1392 1393 /// Install empty handlers for all pragmas (making them ignored). 1394 void IgnorePragmas(); 1395 1396 /// Set empty line handler. 1397 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; } 1398 1399 EmptylineHandler *getEmptylineHandler() const { return Emptyline; } 1400 1401 /// Add the specified comment handler to the preprocessor. 1402 void addCommentHandler(CommentHandler *Handler); 1403 1404 /// Remove the specified comment handler. 1405 /// 1406 /// It is an error to remove a handler that has not been registered. 1407 void removeCommentHandler(CommentHandler *Handler); 1408 1409 /// Set the code completion handler to the given object. 1410 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 1411 CodeComplete = &Handler; 1412 } 1413 1414 /// Retrieve the current code-completion handler. 1415 CodeCompletionHandler *getCodeCompletionHandler() const { 1416 return CodeComplete; 1417 } 1418 1419 /// Clear out the code completion handler. 1420 void clearCodeCompletionHandler() { 1421 CodeComplete = nullptr; 1422 } 1423 1424 /// Hook used by the lexer to invoke the "included file" code 1425 /// completion point. 1426 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled); 1427 1428 /// Hook used by the lexer to invoke the "natural language" code 1429 /// completion point. 1430 void CodeCompleteNaturalLanguage(); 1431 1432 /// Set the code completion token for filtering purposes. 1433 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) { 1434 CodeCompletionII = Filter; 1435 } 1436 1437 /// Set the code completion token range for detecting replacement range later 1438 /// on. 1439 void setCodeCompletionTokenRange(const SourceLocation Start, 1440 const SourceLocation End) { 1441 CodeCompletionTokenRange = {Start, End}; 1442 } 1443 SourceRange getCodeCompletionTokenRange() const { 1444 return CodeCompletionTokenRange; 1445 } 1446 1447 /// Get the code completion token for filtering purposes. 1448 StringRef getCodeCompletionFilter() { 1449 if (CodeCompletionII) 1450 return CodeCompletionII->getName(); 1451 return {}; 1452 } 1453 1454 /// Retrieve the preprocessing record, or NULL if there is no 1455 /// preprocessing record. 1456 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 1457 1458 /// Create a new preprocessing record, which will keep track of 1459 /// all macro expansions, macro definitions, etc. 1460 void createPreprocessingRecord(); 1461 1462 /// Returns true if the FileEntry is the PCH through header. 1463 bool isPCHThroughHeader(const FileEntry *FE); 1464 1465 /// True if creating a PCH with a through header. 1466 bool creatingPCHWithThroughHeader(); 1467 1468 /// True if using a PCH with a through header. 1469 bool usingPCHWithThroughHeader(); 1470 1471 /// True if creating a PCH with a #pragma hdrstop. 1472 bool creatingPCHWithPragmaHdrStop(); 1473 1474 /// True if using a PCH with a #pragma hdrstop. 1475 bool usingPCHWithPragmaHdrStop(); 1476 1477 /// Skip tokens until after the #include of the through header or 1478 /// until after a #pragma hdrstop. 1479 void SkipTokensWhileUsingPCH(); 1480 1481 /// Process directives while skipping until the through header or 1482 /// #pragma hdrstop is found. 1483 void HandleSkippedDirectiveWhileUsingPCH(Token &Result, 1484 SourceLocation HashLoc); 1485 1486 /// Enter the specified FileID as the main source file, 1487 /// which implicitly adds the builtin defines etc. 1488 void EnterMainSourceFile(); 1489 1490 /// Inform the preprocessor callbacks that processing is complete. 1491 void EndSourceFile(); 1492 1493 /// Add a source file to the top of the include stack and 1494 /// start lexing tokens from it instead of the current buffer. 1495 /// 1496 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1497 bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, 1498 SourceLocation Loc, bool IsFirstIncludeOfFile = true); 1499 1500 /// Add a Macro to the top of the include stack and start lexing 1501 /// tokens from it instead of the current buffer. 1502 /// 1503 /// \param Args specifies the tokens input to a function-like macro. 1504 /// \param ILEnd specifies the location of the ')' for a function-like macro 1505 /// or the identifier for an object-like macro. 1506 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, 1507 MacroArgs *Args); 1508 1509 private: 1510 /// Add a "macro" context to the top of the include stack, 1511 /// which will cause the lexer to start returning the specified tokens. 1512 /// 1513 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1514 /// will not be subject to further macro expansion. Otherwise, these tokens 1515 /// will be re-macro-expanded when/if expansion is enabled. 1516 /// 1517 /// If \p OwnsTokens is false, this method assumes that the specified stream 1518 /// of tokens has a permanent owner somewhere, so they do not need to be 1519 /// copied. If it is true, it assumes the array of tokens is allocated with 1520 /// \c new[] and the Preprocessor will delete[] it. 1521 /// 1522 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag 1523 /// set, see the flag documentation for details. 1524 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1525 bool DisableMacroExpansion, bool OwnsTokens, 1526 bool IsReinject); 1527 1528 public: 1529 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1530 bool DisableMacroExpansion, bool IsReinject) { 1531 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true, 1532 IsReinject); 1533 } 1534 1535 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion, 1536 bool IsReinject) { 1537 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false, 1538 IsReinject); 1539 } 1540 1541 /// Pop the current lexer/macro exp off the top of the lexer stack. 1542 /// 1543 /// This should only be used in situations where the current state of the 1544 /// top-of-stack lexer is known. 1545 void RemoveTopOfLexerStack(); 1546 1547 /// From the point that this method is called, and until 1548 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1549 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1550 /// make the Preprocessor re-lex the same tokens. 1551 /// 1552 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1553 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1554 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1555 /// 1556 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1557 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1558 /// tokens will continue indefinitely. 1559 /// 1560 void EnableBacktrackAtThisPos(); 1561 1562 /// Disable the last EnableBacktrackAtThisPos call. 1563 void CommitBacktrackedTokens(); 1564 1565 /// Make Preprocessor re-lex the tokens that were lexed since 1566 /// EnableBacktrackAtThisPos() was previously called. 1567 void Backtrack(); 1568 1569 /// True if EnableBacktrackAtThisPos() was called and 1570 /// caching of tokens is on. 1571 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1572 1573 /// Lex the next token for this preprocessor. 1574 void Lex(Token &Result); 1575 1576 /// Lex a token, forming a header-name token if possible. 1577 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); 1578 1579 bool LexAfterModuleImport(Token &Result); 1580 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); 1581 1582 void makeModuleVisible(Module *M, SourceLocation Loc); 1583 1584 SourceLocation getModuleImportLoc(Module *M) const { 1585 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1586 } 1587 1588 /// Lex a string literal, which may be the concatenation of multiple 1589 /// string literals and may even come from macro expansion. 1590 /// \returns true on success, false if a error diagnostic has been generated. 1591 bool LexStringLiteral(Token &Result, std::string &String, 1592 const char *DiagnosticTag, bool AllowMacroExpansion) { 1593 if (AllowMacroExpansion) 1594 Lex(Result); 1595 else 1596 LexUnexpandedToken(Result); 1597 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1598 AllowMacroExpansion); 1599 } 1600 1601 /// Complete the lexing of a string literal where the first token has 1602 /// already been lexed (see LexStringLiteral). 1603 bool FinishLexStringLiteral(Token &Result, std::string &String, 1604 const char *DiagnosticTag, 1605 bool AllowMacroExpansion); 1606 1607 /// Lex a token. If it's a comment, keep lexing until we get 1608 /// something not a comment. 1609 /// 1610 /// This is useful in -E -C mode where comments would foul up preprocessor 1611 /// directive handling. 1612 void LexNonComment(Token &Result) { 1613 do 1614 Lex(Result); 1615 while (Result.getKind() == tok::comment); 1616 } 1617 1618 /// Just like Lex, but disables macro expansion of identifier tokens. 1619 void LexUnexpandedToken(Token &Result) { 1620 // Disable macro expansion. 1621 bool OldVal = DisableMacroExpansion; 1622 DisableMacroExpansion = true; 1623 // Lex the token. 1624 Lex(Result); 1625 1626 // Reenable it. 1627 DisableMacroExpansion = OldVal; 1628 } 1629 1630 /// Like LexNonComment, but this disables macro expansion of 1631 /// identifier tokens. 1632 void LexUnexpandedNonComment(Token &Result) { 1633 do 1634 LexUnexpandedToken(Result); 1635 while (Result.getKind() == tok::comment); 1636 } 1637 1638 /// Parses a simple integer literal to get its numeric value. Floating 1639 /// point literals and user defined literals are rejected. Used primarily to 1640 /// handle pragmas that accept integer arguments. 1641 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1642 1643 /// Disables macro expansion everywhere except for preprocessor directives. 1644 void SetMacroExpansionOnlyInDirectives() { 1645 DisableMacroExpansion = true; 1646 MacroExpansionInDirectivesOverride = true; 1647 } 1648 1649 /// Peeks ahead N tokens and returns that token without consuming any 1650 /// tokens. 1651 /// 1652 /// LookAhead(0) returns the next token that would be returned by Lex(), 1653 /// LookAhead(1) returns the token after it, etc. This returns normal 1654 /// tokens after phase 5. As such, it is equivalent to using 1655 /// 'Lex', not 'LexUnexpandedToken'. 1656 const Token &LookAhead(unsigned N) { 1657 assert(LexLevel == 0 && "cannot use lookahead while lexing"); 1658 if (CachedLexPos + N < CachedTokens.size()) 1659 return CachedTokens[CachedLexPos+N]; 1660 else 1661 return PeekAhead(N+1); 1662 } 1663 1664 /// When backtracking is enabled and tokens are cached, 1665 /// this allows to revert a specific number of tokens. 1666 /// 1667 /// Note that the number of tokens being reverted should be up to the last 1668 /// backtrack position, not more. 1669 void RevertCachedTokens(unsigned N) { 1670 assert(isBacktrackEnabled() && 1671 "Should only be called when tokens are cached for backtracking"); 1672 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 1673 && "Should revert tokens up to the last backtrack position, not more"); 1674 assert(signed(CachedLexPos) - signed(N) >= 0 && 1675 "Corrupted backtrack positions ?"); 1676 CachedLexPos -= N; 1677 } 1678 1679 /// Enters a token in the token stream to be lexed next. 1680 /// 1681 /// If BackTrack() is called afterwards, the token will remain at the 1682 /// insertion point. 1683 /// If \p IsReinject is true, resulting token will have Token::IsReinjected 1684 /// flag set. See the flag documentation for details. 1685 void EnterToken(const Token &Tok, bool IsReinject) { 1686 if (LexLevel) { 1687 // It's not correct in general to enter caching lex mode while in the 1688 // middle of a nested lexing action. 1689 auto TokCopy = std::make_unique<Token[]>(1); 1690 TokCopy[0] = Tok; 1691 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject); 1692 } else { 1693 EnterCachingLexMode(); 1694 assert(IsReinject && "new tokens in the middle of cached stream"); 1695 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1696 } 1697 } 1698 1699 /// We notify the Preprocessor that if it is caching tokens (because 1700 /// backtrack is enabled) it should replace the most recent cached tokens 1701 /// with the given annotation token. This function has no effect if 1702 /// backtracking is not enabled. 1703 /// 1704 /// Note that the use of this function is just for optimization, so that the 1705 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1706 /// invoked. 1707 void AnnotateCachedTokens(const Token &Tok) { 1708 assert(Tok.isAnnotation() && "Expected annotation token"); 1709 if (CachedLexPos != 0 && isBacktrackEnabled()) 1710 AnnotatePreviousCachedTokens(Tok); 1711 } 1712 1713 /// Get the location of the last cached token, suitable for setting the end 1714 /// location of an annotation token. 1715 SourceLocation getLastCachedTokenLocation() const { 1716 assert(CachedLexPos != 0); 1717 return CachedTokens[CachedLexPos-1].getLastLoc(); 1718 } 1719 1720 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1721 /// CachedTokens. 1722 bool IsPreviousCachedToken(const Token &Tok) const; 1723 1724 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1725 /// in \p NewToks. 1726 /// 1727 /// Useful when a token needs to be split in smaller ones and CachedTokens 1728 /// most recent token must to be updated to reflect that. 1729 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1730 1731 /// Replace the last token with an annotation token. 1732 /// 1733 /// Like AnnotateCachedTokens(), this routine replaces an 1734 /// already-parsed (and resolved) token with an annotation 1735 /// token. However, this routine only replaces the last token with 1736 /// the annotation token; it does not affect any other cached 1737 /// tokens. This function has no effect if backtracking is not 1738 /// enabled. 1739 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1740 assert(Tok.isAnnotation() && "Expected annotation token"); 1741 if (CachedLexPos != 0 && isBacktrackEnabled()) 1742 CachedTokens[CachedLexPos-1] = Tok; 1743 } 1744 1745 /// Enter an annotation token into the token stream. 1746 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, 1747 void *AnnotationVal); 1748 1749 /// Determine whether it's possible for a future call to Lex to produce an 1750 /// annotation token created by a previous call to EnterAnnotationToken. 1751 bool mightHavePendingAnnotationTokens() { 1752 return CurLexerKind != CLK_Lexer; 1753 } 1754 1755 /// Update the current token to represent the provided 1756 /// identifier, in order to cache an action performed by typo correction. 1757 void TypoCorrectToken(const Token &Tok) { 1758 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1759 if (CachedLexPos != 0 && isBacktrackEnabled()) 1760 CachedTokens[CachedLexPos-1] = Tok; 1761 } 1762 1763 /// Recompute the current lexer kind based on the CurLexer/ 1764 /// CurTokenLexer pointers. 1765 void recomputeCurLexerKind(); 1766 1767 /// Returns true if incremental processing is enabled 1768 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1769 1770 /// Enables the incremental processing 1771 void enableIncrementalProcessing(bool value = true) { 1772 IncrementalProcessing = value; 1773 } 1774 1775 /// Specify the point at which code-completion will be performed. 1776 /// 1777 /// \param File the file in which code completion should occur. If 1778 /// this file is included multiple times, code-completion will 1779 /// perform completion the first time it is included. If NULL, this 1780 /// function clears out the code-completion point. 1781 /// 1782 /// \param Line the line at which code completion should occur 1783 /// (1-based). 1784 /// 1785 /// \param Column the column at which code completion should occur 1786 /// (1-based). 1787 /// 1788 /// \returns true if an error occurred, false otherwise. 1789 bool SetCodeCompletionPoint(const FileEntry *File, 1790 unsigned Line, unsigned Column); 1791 1792 /// Determine if we are performing code completion. 1793 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1794 1795 /// Returns the location of the code-completion point. 1796 /// 1797 /// Returns an invalid location if code-completion is not enabled or the file 1798 /// containing the code-completion point has not been lexed yet. 1799 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1800 1801 /// Returns the start location of the file of code-completion point. 1802 /// 1803 /// Returns an invalid location if code-completion is not enabled or the file 1804 /// containing the code-completion point has not been lexed yet. 1805 SourceLocation getCodeCompletionFileLoc() const { 1806 return CodeCompletionFileLoc; 1807 } 1808 1809 /// Returns true if code-completion is enabled and we have hit the 1810 /// code-completion point. 1811 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1812 1813 /// Note that we hit the code-completion point. 1814 void setCodeCompletionReached() { 1815 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1816 CodeCompletionReached = true; 1817 // Silence any diagnostics that occur after we hit the code-completion. 1818 getDiagnostics().setSuppressAllDiagnostics(true); 1819 } 1820 1821 /// The location of the currently-active \#pragma clang 1822 /// arc_cf_code_audited begin. 1823 /// 1824 /// Returns an invalid location if there is no such pragma active. 1825 std::pair<IdentifierInfo *, SourceLocation> 1826 getPragmaARCCFCodeAuditedInfo() const { 1827 return PragmaARCCFCodeAuditedInfo; 1828 } 1829 1830 /// Set the location of the currently-active \#pragma clang 1831 /// arc_cf_code_audited begin. An invalid location ends the pragma. 1832 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, 1833 SourceLocation Loc) { 1834 PragmaARCCFCodeAuditedInfo = {Ident, Loc}; 1835 } 1836 1837 /// The location of the currently-active \#pragma clang 1838 /// assume_nonnull begin. 1839 /// 1840 /// Returns an invalid location if there is no such pragma active. 1841 SourceLocation getPragmaAssumeNonNullLoc() const { 1842 return PragmaAssumeNonNullLoc; 1843 } 1844 1845 /// Set the location of the currently-active \#pragma clang 1846 /// assume_nonnull begin. An invalid location ends the pragma. 1847 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 1848 PragmaAssumeNonNullLoc = Loc; 1849 } 1850 1851 /// Get the location of the recorded unterminated \#pragma clang 1852 /// assume_nonnull begin in the preamble, if one exists. 1853 /// 1854 /// Returns an invalid location if the premable did not end with 1855 /// such a pragma active or if there is no recorded preamble. 1856 SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const { 1857 return PreambleRecordedPragmaAssumeNonNullLoc; 1858 } 1859 1860 /// Record the location of the unterminated \#pragma clang 1861 /// assume_nonnull begin in the preamble. 1862 void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) { 1863 PreambleRecordedPragmaAssumeNonNullLoc = Loc; 1864 } 1865 1866 /// Set the directory in which the main file should be considered 1867 /// to have been found, if it is not a real file. 1868 void setMainFileDir(const DirectoryEntry *Dir) { 1869 MainFileDir = Dir; 1870 } 1871 1872 /// Instruct the preprocessor to skip part of the main source file. 1873 /// 1874 /// \param Bytes The number of bytes in the preamble to skip. 1875 /// 1876 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 1877 /// start of a line. 1878 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 1879 SkipMainFilePreamble.first = Bytes; 1880 SkipMainFilePreamble.second = StartOfLine; 1881 } 1882 1883 /// Forwarding function for diagnostics. This emits a diagnostic at 1884 /// the specified Token's location, translating the token's start 1885 /// position in the current buffer into a SourcePosition object for rendering. 1886 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 1887 return Diags->Report(Loc, DiagID); 1888 } 1889 1890 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 1891 return Diags->Report(Tok.getLocation(), DiagID); 1892 } 1893 1894 /// Return the 'spelling' of the token at the given 1895 /// location; does not go up to the spelling location or down to the 1896 /// expansion location. 1897 /// 1898 /// \param buffer A buffer which will be used only if the token requires 1899 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 1900 /// \param invalid If non-null, will be set \c true if an error occurs. 1901 StringRef getSpelling(SourceLocation loc, 1902 SmallVectorImpl<char> &buffer, 1903 bool *invalid = nullptr) const { 1904 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 1905 } 1906 1907 /// Return the 'spelling' of the Tok token. 1908 /// 1909 /// The spelling of a token is the characters used to represent the token in 1910 /// the source file after trigraph expansion and escaped-newline folding. In 1911 /// particular, this wants to get the true, uncanonicalized, spelling of 1912 /// things like digraphs, UCNs, etc. 1913 /// 1914 /// \param Invalid If non-null, will be set \c true if an error occurs. 1915 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 1916 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 1917 } 1918 1919 /// Get the spelling of a token into a preallocated buffer, instead 1920 /// of as an std::string. 1921 /// 1922 /// The caller is required to allocate enough space for the token, which is 1923 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 1924 /// actual result is returned. 1925 /// 1926 /// Note that this method may do two possible things: it may either fill in 1927 /// the buffer specified with characters, or it may *change the input pointer* 1928 /// to point to a constant buffer with the data already in it (avoiding a 1929 /// copy). The caller is not allowed to modify the returned buffer pointer 1930 /// if an internal buffer is returned. 1931 unsigned getSpelling(const Token &Tok, const char *&Buffer, 1932 bool *Invalid = nullptr) const { 1933 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 1934 } 1935 1936 /// Get the spelling of a token into a SmallVector. 1937 /// 1938 /// Note that the returned StringRef may not point to the 1939 /// supplied buffer if a copy can be avoided. 1940 StringRef getSpelling(const Token &Tok, 1941 SmallVectorImpl<char> &Buffer, 1942 bool *Invalid = nullptr) const; 1943 1944 /// Relex the token at the specified location. 1945 /// \returns true if there was a failure, false on success. 1946 bool getRawToken(SourceLocation Loc, Token &Result, 1947 bool IgnoreWhiteSpace = false) { 1948 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 1949 } 1950 1951 /// Given a Token \p Tok that is a numeric constant with length 1, 1952 /// return the character. 1953 char 1954 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 1955 bool *Invalid = nullptr) const { 1956 assert(Tok.is(tok::numeric_constant) && 1957 Tok.getLength() == 1 && "Called on unsupported token"); 1958 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 1959 1960 // If the token is carrying a literal data pointer, just use it. 1961 if (const char *D = Tok.getLiteralData()) 1962 return *D; 1963 1964 // Otherwise, fall back on getCharacterData, which is slower, but always 1965 // works. 1966 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid); 1967 } 1968 1969 /// Retrieve the name of the immediate macro expansion. 1970 /// 1971 /// This routine starts from a source location, and finds the name of the 1972 /// macro responsible for its immediate expansion. It looks through any 1973 /// intervening macro argument expansions to compute this. It returns a 1974 /// StringRef that refers to the SourceManager-owned buffer of the source 1975 /// where that macro name is spelled. Thus, the result shouldn't out-live 1976 /// the SourceManager. 1977 StringRef getImmediateMacroName(SourceLocation Loc) { 1978 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 1979 } 1980 1981 /// Plop the specified string into a scratch buffer and set the 1982 /// specified token's location and length to it. 1983 /// 1984 /// If specified, the source location provides a location of the expansion 1985 /// point of the token. 1986 void CreateString(StringRef Str, Token &Tok, 1987 SourceLocation ExpansionLocStart = SourceLocation(), 1988 SourceLocation ExpansionLocEnd = SourceLocation()); 1989 1990 /// Split the first Length characters out of the token starting at TokLoc 1991 /// and return a location pointing to the split token. Re-lexing from the 1992 /// split token will return the split token rather than the original. 1993 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length); 1994 1995 /// Computes the source location just past the end of the 1996 /// token at this source location. 1997 /// 1998 /// This routine can be used to produce a source location that 1999 /// points just past the end of the token referenced by \p Loc, and 2000 /// is generally used when a diagnostic needs to point just after a 2001 /// token where it expected something different that it received. If 2002 /// the returned source location would not be meaningful (e.g., if 2003 /// it points into a macro), this routine returns an invalid 2004 /// source location. 2005 /// 2006 /// \param Offset an offset from the end of the token, where the source 2007 /// location should refer to. The default offset (0) produces a source 2008 /// location pointing just past the end of the token; an offset of 1 produces 2009 /// a source location pointing to the last character in the token, etc. 2010 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 2011 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 2012 } 2013 2014 /// Returns true if the given MacroID location points at the first 2015 /// token of the macro expansion. 2016 /// 2017 /// \param MacroBegin If non-null and function returns true, it is set to 2018 /// begin location of the macro. 2019 bool isAtStartOfMacroExpansion(SourceLocation loc, 2020 SourceLocation *MacroBegin = nullptr) const { 2021 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 2022 MacroBegin); 2023 } 2024 2025 /// Returns true if the given MacroID location points at the last 2026 /// token of the macro expansion. 2027 /// 2028 /// \param MacroEnd If non-null and function returns true, it is set to 2029 /// end location of the macro. 2030 bool isAtEndOfMacroExpansion(SourceLocation loc, 2031 SourceLocation *MacroEnd = nullptr) const { 2032 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 2033 } 2034 2035 /// Print the token to stderr, used for debugging. 2036 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 2037 void DumpLocation(SourceLocation Loc) const; 2038 void DumpMacro(const MacroInfo &MI) const; 2039 void dumpMacroInfo(const IdentifierInfo *II); 2040 2041 /// Given a location that specifies the start of a 2042 /// token, return a new location that specifies a character within the token. 2043 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 2044 unsigned Char) const { 2045 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 2046 } 2047 2048 /// Increment the counters for the number of token paste operations 2049 /// performed. 2050 /// 2051 /// If fast was specified, this is a 'fast paste' case we handled. 2052 void IncrementPasteCounter(bool isFast) { 2053 if (isFast) 2054 ++NumFastTokenPaste; 2055 else 2056 ++NumTokenPaste; 2057 } 2058 2059 void PrintStats(); 2060 2061 size_t getTotalMemory() const; 2062 2063 /// When the macro expander pastes together a comment (/##/) in Microsoft 2064 /// mode, this method handles updating the current state, returning the 2065 /// token on the next source line. 2066 void HandleMicrosoftCommentPaste(Token &Tok); 2067 2068 //===--------------------------------------------------------------------===// 2069 // Preprocessor callback methods. These are invoked by a lexer as various 2070 // directives and events are found. 2071 2072 /// Given a tok::raw_identifier token, look up the 2073 /// identifier information for the token and install it into the token, 2074 /// updating the token kind accordingly. 2075 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 2076 2077 private: 2078 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 2079 2080 public: 2081 /// Specifies the reason for poisoning an identifier. 2082 /// 2083 /// If that identifier is accessed while poisoned, then this reason will be 2084 /// used instead of the default "poisoned" diagnostic. 2085 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 2086 2087 /// Display reason for poisoned identifier. 2088 void HandlePoisonedIdentifier(Token & Identifier); 2089 2090 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 2091 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 2092 if(II->isPoisoned()) { 2093 HandlePoisonedIdentifier(Identifier); 2094 } 2095 } 2096 } 2097 2098 private: 2099 /// Identifiers used for SEH handling in Borland. These are only 2100 /// allowed in particular circumstances 2101 // __except block 2102 IdentifierInfo *Ident__exception_code, 2103 *Ident___exception_code, 2104 *Ident_GetExceptionCode; 2105 // __except filter expression 2106 IdentifierInfo *Ident__exception_info, 2107 *Ident___exception_info, 2108 *Ident_GetExceptionInfo; 2109 // __finally 2110 IdentifierInfo *Ident__abnormal_termination, 2111 *Ident___abnormal_termination, 2112 *Ident_AbnormalTermination; 2113 2114 const char *getCurLexerEndPos(); 2115 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); 2116 2117 public: 2118 void PoisonSEHIdentifiers(bool Poison = true); // Borland 2119 2120 /// Callback invoked when the lexer reads an identifier and has 2121 /// filled in the tokens IdentifierInfo member. 2122 /// 2123 /// This callback potentially macro expands it or turns it into a named 2124 /// token (like 'for'). 2125 /// 2126 /// \returns true if we actually computed a token, false if we need to 2127 /// lex again. 2128 bool HandleIdentifier(Token &Identifier); 2129 2130 /// Callback invoked when the lexer hits the end of the current file. 2131 /// 2132 /// This either returns the EOF token and returns true, or 2133 /// pops a level off the include stack and returns false, at which point the 2134 /// client should call lex again. 2135 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 2136 2137 /// Callback invoked when the current TokenLexer hits the end of its 2138 /// token stream. 2139 bool HandleEndOfTokenLexer(Token &Result); 2140 2141 /// Callback invoked when the lexer sees a # token at the start of a 2142 /// line. 2143 /// 2144 /// This consumes the directive, modifies the lexer/preprocessor state, and 2145 /// advances the lexer(s) so that the next token read is the correct one. 2146 void HandleDirective(Token &Result); 2147 2148 /// Ensure that the next token is a tok::eod token. 2149 /// 2150 /// If not, emit a diagnostic and consume up until the eod. 2151 /// If \p EnableMacros is true, then we consider macros that expand to zero 2152 /// tokens as being ok. 2153 /// 2154 /// \return The location of the end of the directive (the terminating 2155 /// newline). 2156 SourceLocation CheckEndOfDirective(const char *DirType, 2157 bool EnableMacros = false); 2158 2159 /// Read and discard all tokens remaining on the current line until 2160 /// the tok::eod token is found. Returns the range of the skipped tokens. 2161 SourceRange DiscardUntilEndOfDirective(); 2162 2163 /// Returns true if the preprocessor has seen a use of 2164 /// __DATE__ or __TIME__ in the file so far. 2165 bool SawDateOrTime() const { 2166 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 2167 } 2168 unsigned getCounterValue() const { return CounterValue; } 2169 void setCounterValue(unsigned V) { CounterValue = V; } 2170 2171 LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const { 2172 assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine && 2173 "FPEvalMethod should be set either from command line or from the " 2174 "target info"); 2175 return CurrentFPEvalMethod; 2176 } 2177 2178 LangOptions::FPEvalMethodKind getTUFPEvalMethod() const { 2179 return TUFPEvalMethod; 2180 } 2181 2182 SourceLocation getLastFPEvalPragmaLocation() const { 2183 return LastFPEvalPragmaLocation; 2184 } 2185 2186 LangOptions::FPEvalMethodKind getLastFPEvalMethod() const { 2187 return LastFPEvalMethod; 2188 } 2189 2190 void setLastFPEvalMethod(LangOptions::FPEvalMethodKind Val) { 2191 LastFPEvalMethod = Val; 2192 } 2193 2194 void setCurrentFPEvalMethod(SourceLocation PragmaLoc, 2195 LangOptions::FPEvalMethodKind Val) { 2196 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2197 "FPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2198 // This is the location of the '#pragma float_control" where the 2199 // execution state is modifed. 2200 LastFPEvalPragmaLocation = PragmaLoc; 2201 CurrentFPEvalMethod = Val; 2202 TUFPEvalMethod = Val; 2203 } 2204 2205 void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) { 2206 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2207 "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2208 TUFPEvalMethod = Val; 2209 } 2210 2211 /// Retrieves the module that we're currently building, if any. 2212 Module *getCurrentModule(); 2213 2214 /// Allocate a new MacroInfo object with the provided SourceLocation. 2215 MacroInfo *AllocateMacroInfo(SourceLocation L); 2216 2217 /// Turn the specified lexer token into a fully checked and spelled 2218 /// filename, e.g. as an operand of \#include. 2219 /// 2220 /// The caller is expected to provide a buffer that is large enough to hold 2221 /// the spelling of the filename, but is also expected to handle the case 2222 /// when this method decides to use a different buffer. 2223 /// 2224 /// \returns true if the input filename was in <>'s or false if it was 2225 /// in ""'s. 2226 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer); 2227 2228 /// Given a "foo" or \<foo> reference, look up the indicated file. 2229 /// 2230 /// Returns None on failure. \p isAngled indicates whether the file 2231 /// reference is for system \#include's or not (i.e. using <> instead of ""). 2232 Optional<FileEntryRef> 2233 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, 2234 ConstSearchDirIterator FromDir, const FileEntry *FromFile, 2235 ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath, 2236 SmallVectorImpl<char> *RelativePath, 2237 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, 2238 bool *IsFrameworkFound, bool SkipCache = false); 2239 2240 /// Return true if we're in the top-level file, not in a \#include. 2241 bool isInPrimaryFile() const; 2242 2243 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is 2244 /// followed by EOD. Return true if the token is not a valid on-off-switch. 2245 bool LexOnOffSwitch(tok::OnOffSwitch &Result); 2246 2247 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 2248 bool *ShadowFlag = nullptr); 2249 2250 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma); 2251 Module *LeaveSubmodule(bool ForPragma); 2252 2253 private: 2254 friend void TokenLexer::ExpandFunctionArguments(); 2255 2256 void PushIncludeMacroStack() { 2257 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer"); 2258 IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule, 2259 std::move(CurLexer), CurPPLexer, 2260 std::move(CurTokenLexer), CurDirLookup); 2261 CurPPLexer = nullptr; 2262 } 2263 2264 void PopIncludeMacroStack() { 2265 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 2266 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 2267 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 2268 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 2269 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; 2270 CurLexerKind = IncludeMacroStack.back().CurLexerKind; 2271 IncludeMacroStack.pop_back(); 2272 } 2273 2274 void PropagateLineStartLeadingSpaceInfo(Token &Result); 2275 2276 /// Determine whether we need to create module macros for #defines in the 2277 /// current context. 2278 bool needModuleMacros() const; 2279 2280 /// Update the set of active module macros and ambiguity flag for a module 2281 /// macro name. 2282 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 2283 2284 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 2285 SourceLocation Loc); 2286 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 2287 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 2288 bool isPublic); 2289 2290 /// Lex and validate a macro name, which occurs after a 2291 /// \#define or \#undef. 2292 /// 2293 /// \param MacroNameTok Token that represents the name defined or undefined. 2294 /// \param IsDefineUndef Kind if preprocessor directive. 2295 /// \param ShadowFlag Points to flag that is set if macro name shadows 2296 /// a keyword. 2297 /// 2298 /// This emits a diagnostic, sets the token kind to eod, 2299 /// and discards the rest of the macro line if the macro name is invalid. 2300 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 2301 bool *ShadowFlag = nullptr); 2302 2303 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 2304 /// entire line) of the macro's tokens and adds them to MacroInfo, and while 2305 /// doing so performs certain validity checks including (but not limited to): 2306 /// - # (stringization) is followed by a macro parameter 2307 /// \param MacroNameTok - Token that represents the macro name 2308 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard 2309 /// 2310 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and 2311 /// returns a nullptr if an invalid sequence of tokens is encountered. 2312 MacroInfo *ReadOptionalMacroParameterListAndBody( 2313 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard); 2314 2315 /// The ( starting an argument list of a macro definition has just been read. 2316 /// Lex the rest of the parameters and the closing ), updating \p MI with 2317 /// what we learn and saving in \p LastTok the last token read. 2318 /// Return true if an error occurs parsing the arg list. 2319 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); 2320 2321 /// Provide a suggestion for a typoed directive. If there is no typo, then 2322 /// just skip suggesting. 2323 /// 2324 /// \param Tok - Token that represents the directive 2325 /// \param Directive - String reference for the directive name 2326 void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const; 2327 2328 /// We just read a \#if or related directive and decided that the 2329 /// subsequent tokens are in the \#if'd out portion of the 2330 /// file. Lex the rest of the file, until we see an \#endif. If \p 2331 /// FoundNonSkipPortion is true, then we have already emitted code for part of 2332 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 2333 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 2334 /// already seen one so a \#else directive is a duplicate. When this returns, 2335 /// the caller can lex the first valid token. 2336 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, 2337 SourceLocation IfTokenLoc, 2338 bool FoundNonSkipPortion, bool FoundElse, 2339 SourceLocation ElseLoc = SourceLocation()); 2340 2341 /// Information about the result for evaluating an expression for a 2342 /// preprocessor directive. 2343 struct DirectiveEvalResult { 2344 /// Whether the expression was evaluated as true or not. 2345 bool Conditional; 2346 2347 /// True if the expression contained identifiers that were undefined. 2348 bool IncludedUndefinedIds; 2349 2350 /// The source range for the expression. 2351 SourceRange ExprRange; 2352 }; 2353 2354 /// Evaluate an integer constant expression that may occur after a 2355 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2356 /// 2357 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2358 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); 2359 2360 /// Process a '__has_include("path")' expression. 2361 /// 2362 /// Returns true if successful. 2363 bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II); 2364 2365 /// Process '__has_include_next("path")' expression. 2366 /// 2367 /// Returns true if successful. 2368 bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II); 2369 2370 /// Get the directory and file from which to start \#include_next lookup. 2371 std::pair<ConstSearchDirIterator, const FileEntry *> 2372 getIncludeNextStart(const Token &IncludeNextTok) const; 2373 2374 /// Install the standard preprocessor pragmas: 2375 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 2376 void RegisterBuiltinPragmas(); 2377 2378 /// Register builtin macros such as __LINE__ with the identifier table. 2379 void RegisterBuiltinMacros(); 2380 2381 /// If an identifier token is read that is to be expanded as a macro, handle 2382 /// it and return the next token as 'Tok'. If we lexed a token, return true; 2383 /// otherwise the caller should lex again. 2384 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD); 2385 2386 /// Cache macro expanded tokens for TokenLexers. 2387 // 2388 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 2389 /// going to lex in the cache and when it finishes the tokens are removed 2390 /// from the end of the cache. 2391 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 2392 ArrayRef<Token> tokens); 2393 2394 void removeCachedMacroExpandedTokensOfLastLexer(); 2395 2396 /// Determine whether the next preprocessor token to be 2397 /// lexed is a '('. If so, consume the token and return true, if not, this 2398 /// method should have no observable side-effect on the lexed tokens. 2399 bool isNextPPTokenLParen(); 2400 2401 /// After reading "MACRO(", this method is invoked to read all of the formal 2402 /// arguments specified for the macro invocation. Returns null on error. 2403 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, 2404 SourceLocation &MacroEnd); 2405 2406 /// If an identifier token is read that is to be expanded 2407 /// as a builtin macro, handle it and return the next token as 'Tok'. 2408 void ExpandBuiltinMacro(Token &Tok); 2409 2410 /// Read a \c _Pragma directive, slice it up, process it, then 2411 /// return the first token after the directive. 2412 /// This assumes that the \c _Pragma token has just been read into \p Tok. 2413 void Handle_Pragma(Token &Tok); 2414 2415 /// Like Handle_Pragma except the pragma text is not enclosed within 2416 /// a string literal. 2417 void HandleMicrosoft__pragma(Token &Tok); 2418 2419 /// Add a lexer to the top of the include stack and 2420 /// start lexing tokens from it instead of the current buffer. 2421 void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir); 2422 2423 /// Set the FileID for the preprocessor predefines. 2424 void setPredefinesFileID(FileID FID) { 2425 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 2426 PredefinesFileID = FID; 2427 } 2428 2429 /// Set the FileID for the PCH through header. 2430 void setPCHThroughHeaderFileID(FileID FID); 2431 2432 /// Returns true if we are lexing from a file and not a 2433 /// pragma or a macro. 2434 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 2435 return L ? !L->isPragmaLexer() : P != nullptr; 2436 } 2437 2438 static bool IsFileLexer(const IncludeStackInfo& I) { 2439 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 2440 } 2441 2442 bool IsFileLexer() const { 2443 return IsFileLexer(CurLexer.get(), CurPPLexer); 2444 } 2445 2446 //===--------------------------------------------------------------------===// 2447 // Caching stuff. 2448 void CachingLex(Token &Result); 2449 2450 bool InCachingLexMode() const { 2451 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 2452 // that we are past EOF, not that we are in CachingLex mode. 2453 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty(); 2454 } 2455 2456 void EnterCachingLexMode(); 2457 void EnterCachingLexModeUnchecked(); 2458 2459 void ExitCachingLexMode() { 2460 if (InCachingLexMode()) 2461 RemoveTopOfLexerStack(); 2462 } 2463 2464 const Token &PeekAhead(unsigned N); 2465 void AnnotatePreviousCachedTokens(const Token &Tok); 2466 2467 //===--------------------------------------------------------------------===// 2468 /// Handle*Directive - implement the various preprocessor directives. These 2469 /// should side-effect the current preprocessor object so that the next call 2470 /// to Lex() will return the appropriate token next. 2471 void HandleLineDirective(); 2472 void HandleDigitDirective(Token &Tok); 2473 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 2474 void HandleIdentSCCSDirective(Token &Tok); 2475 void HandleMacroPublicDirective(Token &Tok); 2476 void HandleMacroPrivateDirective(); 2477 2478 /// An additional notification that can be produced by a header inclusion or 2479 /// import to tell the parser what happened. 2480 struct ImportAction { 2481 enum ActionKind { 2482 None, 2483 ModuleBegin, 2484 ModuleImport, 2485 HeaderUnitImport, 2486 SkippedModuleImport, 2487 Failure, 2488 } Kind; 2489 Module *ModuleForHeader = nullptr; 2490 2491 ImportAction(ActionKind AK, Module *Mod = nullptr) 2492 : Kind(AK), ModuleForHeader(Mod) { 2493 assert((AK == None || Mod || AK == Failure) && 2494 "no module for module action"); 2495 } 2496 }; 2497 2498 Optional<FileEntryRef> LookupHeaderIncludeOrImport( 2499 ConstSearchDirIterator *CurDir, StringRef &Filename, 2500 SourceLocation FilenameLoc, CharSourceRange FilenameRange, 2501 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, 2502 bool &IsMapped, ConstSearchDirIterator LookupFrom, 2503 const FileEntry *LookupFromFile, StringRef &LookupFilename, 2504 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, 2505 ModuleMap::KnownHeader &SuggestedModule, bool isAngled); 2506 2507 // File inclusion. 2508 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, 2509 ConstSearchDirIterator LookupFrom = nullptr, 2510 const FileEntry *LookupFromFile = nullptr); 2511 ImportAction 2512 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok, 2513 Token &FilenameTok, SourceLocation EndLoc, 2514 ConstSearchDirIterator LookupFrom = nullptr, 2515 const FileEntry *LookupFromFile = nullptr); 2516 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 2517 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 2518 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 2519 void HandleMicrosoftImportDirective(Token &Tok); 2520 2521 public: 2522 /// Check that the given module is available, producing a diagnostic if not. 2523 /// \return \c true if the check failed (because the module is not available). 2524 /// \c false if the module appears to be usable. 2525 static bool checkModuleIsAvailable(const LangOptions &LangOpts, 2526 const TargetInfo &TargetInfo, 2527 DiagnosticsEngine &Diags, Module *M); 2528 2529 // Module inclusion testing. 2530 /// Find the module that owns the source or header file that 2531 /// \p Loc points to. If the location is in a file that was included 2532 /// into a module, or is outside any module, returns nullptr. 2533 Module *getModuleForLocation(SourceLocation Loc); 2534 2535 /// We want to produce a diagnostic at location IncLoc concerning an 2536 /// unreachable effect at location MLoc (eg, where a desired entity was 2537 /// declared or defined). Determine whether the right way to make MLoc 2538 /// reachable is by #include, and if so, what header should be included. 2539 /// 2540 /// This is not necessarily fast, and might load unexpected module maps, so 2541 /// should only be called by code that intends to produce an error. 2542 /// 2543 /// \param IncLoc The location at which the missing effect was detected. 2544 /// \param MLoc A location within an unimported module at which the desired 2545 /// effect occurred. 2546 /// \return A file that can be #included to provide the desired effect. Null 2547 /// if no such file could be determined or if a #include is not 2548 /// appropriate (eg, if a module should be imported instead). 2549 const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 2550 SourceLocation MLoc); 2551 2552 bool isRecordingPreamble() const { 2553 return PreambleConditionalStack.isRecording(); 2554 } 2555 2556 bool hasRecordedPreamble() const { 2557 return PreambleConditionalStack.hasRecordedPreamble(); 2558 } 2559 2560 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const { 2561 return PreambleConditionalStack.getStack(); 2562 } 2563 2564 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2565 PreambleConditionalStack.setStack(s); 2566 } 2567 2568 void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s, 2569 llvm::Optional<PreambleSkipInfo> SkipInfo) { 2570 PreambleConditionalStack.startReplaying(); 2571 PreambleConditionalStack.setStack(s); 2572 PreambleConditionalStack.SkipInfo = SkipInfo; 2573 } 2574 2575 llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const { 2576 return PreambleConditionalStack.SkipInfo; 2577 } 2578 2579 private: 2580 /// After processing predefined file, initialize the conditional stack from 2581 /// the preamble. 2582 void replayPreambleConditionalStack(); 2583 2584 // Macro handling. 2585 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard); 2586 void HandleUndefDirective(); 2587 2588 // Conditional Inclusion. 2589 void HandleIfdefDirective(Token &Result, const Token &HashToken, 2590 bool isIfndef, bool ReadAnyTokensBeforeDirective); 2591 void HandleIfDirective(Token &IfToken, const Token &HashToken, 2592 bool ReadAnyTokensBeforeDirective); 2593 void HandleEndifDirective(Token &EndifToken); 2594 void HandleElseDirective(Token &Result, const Token &HashToken); 2595 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken, 2596 tok::PPKeywordKind Kind); 2597 2598 // Pragmas. 2599 void HandlePragmaDirective(PragmaIntroducer Introducer); 2600 2601 public: 2602 void HandlePragmaOnce(Token &OnceTok); 2603 void HandlePragmaMark(Token &MarkTok); 2604 void HandlePragmaPoison(); 2605 void HandlePragmaSystemHeader(Token &SysHeaderTok); 2606 void HandlePragmaDependency(Token &DependencyTok); 2607 void HandlePragmaPushMacro(Token &Tok); 2608 void HandlePragmaPopMacro(Token &Tok); 2609 void HandlePragmaIncludeAlias(Token &Tok); 2610 void HandlePragmaModuleBuild(Token &Tok); 2611 void HandlePragmaHdrstop(Token &Tok); 2612 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 2613 2614 // Return true and store the first token only if any CommentHandler 2615 // has inserted some tokens and getCommentRetentionState() is false. 2616 bool HandleComment(Token &result, SourceRange Comment); 2617 2618 /// A macro is used, update information about macros that need unused 2619 /// warnings. 2620 void markMacroAsUsed(MacroInfo *MI); 2621 2622 void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg, 2623 SourceLocation AnnotationLoc) { 2624 auto Annotations = AnnotationInfos.find(II); 2625 if (Annotations == AnnotationInfos.end()) 2626 AnnotationInfos.insert(std::make_pair( 2627 II, 2628 MacroAnnotations::makeDeprecation(AnnotationLoc, std::move(Msg)))); 2629 else 2630 Annotations->second.DeprecationInfo = 2631 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2632 } 2633 2634 void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg, 2635 SourceLocation AnnotationLoc) { 2636 auto Annotations = AnnotationInfos.find(II); 2637 if (Annotations == AnnotationInfos.end()) 2638 AnnotationInfos.insert( 2639 std::make_pair(II, MacroAnnotations::makeRestrictExpansion( 2640 AnnotationLoc, std::move(Msg)))); 2641 else 2642 Annotations->second.RestrictExpansionInfo = 2643 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2644 } 2645 2646 void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) { 2647 auto Annotations = AnnotationInfos.find(II); 2648 if (Annotations == AnnotationInfos.end()) 2649 AnnotationInfos.insert( 2650 std::make_pair(II, MacroAnnotations::makeFinal(AnnotationLoc))); 2651 else 2652 Annotations->second.FinalAnnotationLoc = AnnotationLoc; 2653 } 2654 2655 const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const { 2656 return AnnotationInfos.find(II)->second; 2657 } 2658 2659 void emitMacroExpansionWarnings(const Token &Identifier) const { 2660 if (Identifier.getIdentifierInfo()->isDeprecatedMacro()) 2661 emitMacroDeprecationWarning(Identifier); 2662 2663 if (Identifier.getIdentifierInfo()->isRestrictExpansion() && 2664 !SourceMgr.isInMainFile(Identifier.getLocation())) 2665 emitRestrictExpansionWarning(Identifier); 2666 } 2667 2668 static void processPathForFileMacro(SmallVectorImpl<char> &Path, 2669 const LangOptions &LangOpts, 2670 const TargetInfo &TI); 2671 2672 private: 2673 void emitMacroDeprecationWarning(const Token &Identifier) const; 2674 void emitRestrictExpansionWarning(const Token &Identifier) const; 2675 void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const; 2676 }; 2677 2678 /// Abstract base class that describes a handler that will receive 2679 /// source ranges for each of the comments encountered in the source file. 2680 class CommentHandler { 2681 public: 2682 virtual ~CommentHandler(); 2683 2684 // The handler shall return true if it has pushed any tokens 2685 // to be read using e.g. EnterToken or EnterTokenStream. 2686 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 2687 }; 2688 2689 /// Abstract base class that describes a handler that will receive 2690 /// source ranges for empty lines encountered in the source file. 2691 class EmptylineHandler { 2692 public: 2693 virtual ~EmptylineHandler(); 2694 2695 // The handler handles empty lines. 2696 virtual void HandleEmptyline(SourceRange Range) = 0; 2697 }; 2698 2699 /// Registry of pragma handlers added by plugins 2700 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; 2701 2702 } // namespace clang 2703 2704 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H 2705