1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 15 #define LLVM_CLANG_LEX_PREPROCESSOR_H 16 17 #include "clang/Basic/Diagnostic.h" 18 #include "clang/Basic/DiagnosticIDs.h" 19 #include "clang/Basic/IdentifierTable.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/LangOptions.h" 22 #include "clang/Basic/Module.h" 23 #include "clang/Basic/SourceLocation.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Basic/TokenKinds.h" 26 #include "clang/Lex/HeaderSearch.h" 27 #include "clang/Lex/Lexer.h" 28 #include "clang/Lex/MacroInfo.h" 29 #include "clang/Lex/ModuleLoader.h" 30 #include "clang/Lex/ModuleMap.h" 31 #include "clang/Lex/PPCallbacks.h" 32 #include "clang/Lex/Token.h" 33 #include "clang/Lex/TokenLexer.h" 34 #include "llvm/ADT/ArrayRef.h" 35 #include "llvm/ADT/DenseMap.h" 36 #include "llvm/ADT/FoldingSet.h" 37 #include "llvm/ADT/FunctionExtras.h" 38 #include "llvm/ADT/None.h" 39 #include "llvm/ADT/Optional.h" 40 #include "llvm/ADT/PointerUnion.h" 41 #include "llvm/ADT/STLExtras.h" 42 #include "llvm/ADT/SmallPtrSet.h" 43 #include "llvm/ADT/SmallVector.h" 44 #include "llvm/ADT/StringRef.h" 45 #include "llvm/ADT/TinyPtrVector.h" 46 #include "llvm/ADT/iterator_range.h" 47 #include "llvm/Support/Allocator.h" 48 #include "llvm/Support/Casting.h" 49 #include "llvm/Support/Registry.h" 50 #include <cassert> 51 #include <cstddef> 52 #include <cstdint> 53 #include <map> 54 #include <memory> 55 #include <string> 56 #include <utility> 57 #include <vector> 58 59 namespace llvm { 60 61 template<unsigned InternalLen> class SmallString; 62 63 } // namespace llvm 64 65 namespace clang { 66 67 class CodeCompletionHandler; 68 class CommentHandler; 69 class DirectoryEntry; 70 class DirectoryLookup; 71 class EmptylineHandler; 72 class ExternalPreprocessorSource; 73 class FileEntry; 74 class FileManager; 75 class HeaderSearch; 76 class MacroArgs; 77 class PragmaHandler; 78 class PragmaNamespace; 79 class PreprocessingRecord; 80 class PreprocessorLexer; 81 class PreprocessorOptions; 82 class ScratchBuffer; 83 class TargetInfo; 84 85 namespace Builtin { 86 class Context; 87 } 88 89 /// Stores token information for comparing actual tokens with 90 /// predefined values. Only handles simple tokens and identifiers. 91 class TokenValue { 92 tok::TokenKind Kind; 93 IdentifierInfo *II; 94 95 public: 96 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 97 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 98 assert(Kind != tok::identifier && 99 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 100 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 101 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 102 } 103 104 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 105 106 bool operator==(const Token &Tok) const { 107 return Tok.getKind() == Kind && 108 (!II || II == Tok.getIdentifierInfo()); 109 } 110 }; 111 112 /// Context in which macro name is used. 113 enum MacroUse { 114 // other than #define or #undef 115 MU_Other = 0, 116 117 // macro name specified in #define 118 MU_Define = 1, 119 120 // macro name specified in #undef 121 MU_Undef = 2 122 }; 123 124 /// Engages in a tight little dance with the lexer to efficiently 125 /// preprocess tokens. 126 /// 127 /// Lexers know only about tokens within a single source file, and don't 128 /// know anything about preprocessor-level issues like the \#include stack, 129 /// token expansion, etc. 130 class Preprocessor { 131 friend class VAOptDefinitionContext; 132 friend class VariadicMacroScopeGuard; 133 134 llvm::unique_function<void(const clang::Token &)> OnToken; 135 std::shared_ptr<PreprocessorOptions> PPOpts; 136 DiagnosticsEngine *Diags; 137 LangOptions &LangOpts; 138 const TargetInfo *Target = nullptr; 139 const TargetInfo *AuxTarget = nullptr; 140 FileManager &FileMgr; 141 SourceManager &SourceMgr; 142 std::unique_ptr<ScratchBuffer> ScratchBuf; 143 HeaderSearch &HeaderInfo; 144 ModuleLoader &TheModuleLoader; 145 146 /// External source of macros. 147 ExternalPreprocessorSource *ExternalSource; 148 149 /// A BumpPtrAllocator object used to quickly allocate and release 150 /// objects internal to the Preprocessor. 151 llvm::BumpPtrAllocator BP; 152 153 /// Identifiers for builtin macros and other builtins. 154 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 155 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 156 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 157 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 158 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__ 159 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 160 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 161 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 162 IdentifierInfo *Ident__identifier; // __identifier 163 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 164 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__ 165 IdentifierInfo *Ident__has_feature; // __has_feature 166 IdentifierInfo *Ident__has_extension; // __has_extension 167 IdentifierInfo *Ident__has_builtin; // __has_builtin 168 IdentifierInfo *Ident__has_attribute; // __has_attribute 169 IdentifierInfo *Ident__has_include; // __has_include 170 IdentifierInfo *Ident__has_include_next; // __has_include_next 171 IdentifierInfo *Ident__has_warning; // __has_warning 172 IdentifierInfo *Ident__is_identifier; // __is_identifier 173 IdentifierInfo *Ident__building_module; // __building_module 174 IdentifierInfo *Ident__MODULE__; // __MODULE__ 175 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 176 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute 177 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 178 IdentifierInfo *Ident__is_target_arch; // __is_target_arch 179 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor 180 IdentifierInfo *Ident__is_target_os; // __is_target_os 181 IdentifierInfo *Ident__is_target_environment; // __is_target_environment 182 IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD 183 184 // Weak, only valid (and set) while InMacroArgs is true. 185 Token* ArgMacro; 186 187 SourceLocation DATELoc, TIMELoc; 188 189 // FEM_UnsetOnCommandLine means that an explicit evaluation method was 190 // not specified on the command line. The target is queried to set the 191 // default evaluation method. 192 LangOptions::FPEvalMethodKind CurrentFPEvalMethod = 193 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 194 195 // Keeps the value of the last evaluation method before a 196 // `pragma float_control (precise,off) is applied. 197 LangOptions::FPEvalMethodKind LastFPEvalMethod = 198 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 199 200 // The most recent pragma location where the floating point evaluation 201 // method was modified. This is used to determine whether the 202 // 'pragma clang fp eval_method' was used whithin the current scope. 203 SourceLocation LastFPEvalPragmaLocation; 204 205 LangOptions::FPEvalMethodKind TUFPEvalMethod = 206 LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine; 207 208 // Next __COUNTER__ value, starts at 0. 209 unsigned CounterValue = 0; 210 211 enum { 212 /// Maximum depth of \#includes. 213 MaxAllowedIncludeStackDepth = 200 214 }; 215 216 // State that is set before the preprocessor begins. 217 bool KeepComments : 1; 218 bool KeepMacroComments : 1; 219 bool SuppressIncludeNotFoundError : 1; 220 221 // State that changes while the preprocessor runs: 222 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 223 224 /// Whether the preprocessor owns the header search object. 225 bool OwnsHeaderSearch : 1; 226 227 /// True if macro expansion is disabled. 228 bool DisableMacroExpansion : 1; 229 230 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 231 /// when parsing preprocessor directives. 232 bool MacroExpansionInDirectivesOverride : 1; 233 234 class ResetMacroExpansionHelper; 235 236 /// Whether we have already loaded macros from the external source. 237 mutable bool ReadMacrosFromExternalSource : 1; 238 239 /// True if pragmas are enabled. 240 bool PragmasEnabled : 1; 241 242 /// True if the current build action is a preprocessing action. 243 bool PreprocessedOutput : 1; 244 245 /// True if we are currently preprocessing a #if or #elif directive 246 bool ParsingIfOrElifDirective; 247 248 /// True if we are pre-expanding macro arguments. 249 bool InMacroArgPreExpansion; 250 251 /// Mapping/lookup information for all identifiers in 252 /// the program, including program keywords. 253 mutable IdentifierTable Identifiers; 254 255 /// This table contains all the selectors in the program. 256 /// 257 /// Unlike IdentifierTable above, this table *isn't* populated by the 258 /// preprocessor. It is declared/expanded here because its role/lifetime is 259 /// conceptually similar to the IdentifierTable. In addition, the current 260 /// control flow (in clang::ParseAST()), make it convenient to put here. 261 /// 262 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 263 /// the lifetime of the preprocessor. 264 SelectorTable Selectors; 265 266 /// Information about builtins. 267 std::unique_ptr<Builtin::Context> BuiltinInfo; 268 269 /// Tracks all of the pragmas that the client registered 270 /// with this preprocessor. 271 std::unique_ptr<PragmaNamespace> PragmaHandlers; 272 273 /// Pragma handlers of the original source is stored here during the 274 /// parsing of a model file. 275 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 276 277 /// Tracks all of the comment handlers that the client registered 278 /// with this preprocessor. 279 std::vector<CommentHandler *> CommentHandlers; 280 281 /// Empty line handler. 282 EmptylineHandler *Emptyline = nullptr; 283 284 /// True if we want to ignore EOF token and continue later on (thus 285 /// avoid tearing the Lexer and etc. down). 286 bool IncrementalProcessing = false; 287 288 public: 289 /// The kind of translation unit we are processing. 290 const TranslationUnitKind TUKind; 291 292 private: 293 /// The code-completion handler. 294 CodeCompletionHandler *CodeComplete = nullptr; 295 296 /// The file that we're performing code-completion for, if any. 297 const FileEntry *CodeCompletionFile = nullptr; 298 299 /// The offset in file for the code-completion point. 300 unsigned CodeCompletionOffset = 0; 301 302 /// The location for the code-completion point. This gets instantiated 303 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 304 SourceLocation CodeCompletionLoc; 305 306 /// The start location for the file of the code-completion point. 307 /// 308 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 309 /// for preprocessing. 310 SourceLocation CodeCompletionFileLoc; 311 312 /// The source location of the \c import contextual keyword we just 313 /// lexed, if any. 314 SourceLocation ModuleImportLoc; 315 316 /// The module import path that we're currently processing. 317 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath; 318 319 /// Whether the last token we lexed was an '@'. 320 bool LastTokenWasAt = false; 321 322 /// A position within a C++20 import-seq. 323 class ImportSeq { 324 public: 325 enum State : int { 326 // Positive values represent a number of unclosed brackets. 327 AtTopLevel = 0, 328 AfterTopLevelTokenSeq = -1, 329 AfterExport = -2, 330 AfterImportSeq = -3, 331 }; 332 333 ImportSeq(State S) : S(S) {} 334 335 /// Saw any kind of open bracket. 336 void handleOpenBracket() { 337 S = static_cast<State>(std::max<int>(S, 0) + 1); 338 } 339 /// Saw any kind of close bracket other than '}'. 340 void handleCloseBracket() { 341 S = static_cast<State>(std::max<int>(S, 1) - 1); 342 } 343 /// Saw a close brace. 344 void handleCloseBrace() { 345 handleCloseBracket(); 346 if (S == AtTopLevel && !AfterHeaderName) 347 S = AfterTopLevelTokenSeq; 348 } 349 /// Saw a semicolon. 350 void handleSemi() { 351 if (atTopLevel()) { 352 S = AfterTopLevelTokenSeq; 353 AfterHeaderName = false; 354 } 355 } 356 357 /// Saw an 'export' identifier. 358 void handleExport() { 359 if (S == AfterTopLevelTokenSeq) 360 S = AfterExport; 361 else if (S <= 0) 362 S = AtTopLevel; 363 } 364 /// Saw an 'import' identifier. 365 void handleImport() { 366 if (S == AfterTopLevelTokenSeq || S == AfterExport) 367 S = AfterImportSeq; 368 else if (S <= 0) 369 S = AtTopLevel; 370 } 371 372 /// Saw a 'header-name' token; do not recognize any more 'import' tokens 373 /// until we reach a top-level semicolon. 374 void handleHeaderName() { 375 if (S == AfterImportSeq) 376 AfterHeaderName = true; 377 handleMisc(); 378 } 379 380 /// Saw any other token. 381 void handleMisc() { 382 if (S <= 0) 383 S = AtTopLevel; 384 } 385 386 bool atTopLevel() { return S <= 0; } 387 bool afterImportSeq() { return S == AfterImportSeq; } 388 bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; } 389 390 private: 391 State S; 392 /// Whether we're in the pp-import-suffix following the header-name in a 393 /// pp-import. If so, a close-brace is not sufficient to end the 394 /// top-level-token-seq of an import-seq. 395 bool AfterHeaderName = false; 396 }; 397 398 /// Our current position within a C++20 import-seq. 399 ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq; 400 401 /// Track whether we are in a Global Module Fragment 402 class TrackGMF { 403 public: 404 enum GMFState : int { 405 GMFActive = 1, 406 MaybeGMF = 0, 407 BeforeGMFIntroducer = -1, 408 GMFAbsentOrEnded = -2, 409 }; 410 411 TrackGMF(GMFState S) : S(S) {} 412 413 /// Saw a semicolon. 414 void handleSemi() { 415 // If it is immediately after the first instance of the module keyword, 416 // then that introduces the GMF. 417 if (S == MaybeGMF) 418 S = GMFActive; 419 } 420 421 /// Saw an 'export' identifier. 422 void handleExport() { 423 // The presence of an 'export' keyword always ends or excludes a GMF. 424 S = GMFAbsentOrEnded; 425 } 426 427 /// Saw an 'import' identifier. 428 void handleImport(bool AfterTopLevelTokenSeq) { 429 // If we see this before any 'module' kw, then we have no GMF. 430 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 431 S = GMFAbsentOrEnded; 432 } 433 434 /// Saw a 'module' identifier. 435 void handleModule(bool AfterTopLevelTokenSeq) { 436 // This was the first module identifier and not preceded by any token 437 // that would exclude a GMF. It could begin a GMF, but only if directly 438 // followed by a semicolon. 439 if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer) 440 S = MaybeGMF; 441 else 442 S = GMFAbsentOrEnded; 443 } 444 445 /// Saw any other token. 446 void handleMisc() { 447 // We saw something other than ; after the 'module' kw, so not a GMF. 448 if (S == MaybeGMF) 449 S = GMFAbsentOrEnded; 450 } 451 452 bool inGMF() { return S == GMFActive; } 453 454 private: 455 /// Track the transitions into and out of a Global Module Fragment, 456 /// if one is present. 457 GMFState S; 458 }; 459 460 TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer; 461 462 /// Whether the module import expects an identifier next. Otherwise, 463 /// it expects a '.' or ';'. 464 bool ModuleImportExpectsIdentifier = false; 465 466 /// The identifier and source location of the currently-active 467 /// \#pragma clang arc_cf_code_audited begin. 468 std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo; 469 470 /// The source location of the currently-active 471 /// \#pragma clang assume_nonnull begin. 472 SourceLocation PragmaAssumeNonNullLoc; 473 474 /// Set only for preambles which end with an active 475 /// \#pragma clang assume_nonnull begin. 476 /// 477 /// When the preamble is loaded into the main file, 478 /// `PragmaAssumeNonNullLoc` will be set to this to 479 /// replay the unterminated assume_nonnull. 480 SourceLocation PreambleRecordedPragmaAssumeNonNullLoc; 481 482 /// True if we hit the code-completion point. 483 bool CodeCompletionReached = false; 484 485 /// The code completion token containing the information 486 /// on the stem that is to be code completed. 487 IdentifierInfo *CodeCompletionII = nullptr; 488 489 /// Range for the code completion token. 490 SourceRange CodeCompletionTokenRange; 491 492 /// The directory that the main file should be considered to occupy, 493 /// if it does not correspond to a real file (as happens when building a 494 /// module). 495 const DirectoryEntry *MainFileDir = nullptr; 496 497 /// The number of bytes that we will initially skip when entering the 498 /// main file, along with a flag that indicates whether skipping this number 499 /// of bytes will place the lexer at the start of a line. 500 /// 501 /// This is used when loading a precompiled preamble. 502 std::pair<int, bool> SkipMainFilePreamble; 503 504 /// Whether we hit an error due to reaching max allowed include depth. Allows 505 /// to avoid hitting the same error over and over again. 506 bool HasReachedMaxIncludeDepth = false; 507 508 /// The number of currently-active calls to Lex. 509 /// 510 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often 511 /// require asking for multiple additional tokens. This counter makes it 512 /// possible for Lex to detect whether it's producing a token for the end 513 /// of phase 4 of translation or for some other situation. 514 unsigned LexLevel = 0; 515 516 /// The number of (LexLevel 0) preprocessor tokens. 517 unsigned TokenCount = 0; 518 519 /// Preprocess every token regardless of LexLevel. 520 bool PreprocessToken = false; 521 522 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens 523 /// warning, or zero for unlimited. 524 unsigned MaxTokens = 0; 525 SourceLocation MaxTokensOverrideLoc; 526 527 public: 528 struct PreambleSkipInfo { 529 SourceLocation HashTokenLoc; 530 SourceLocation IfTokenLoc; 531 bool FoundNonSkipPortion; 532 bool FoundElse; 533 SourceLocation ElseLoc; 534 535 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, 536 bool FoundNonSkipPortion, bool FoundElse, 537 SourceLocation ElseLoc) 538 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc), 539 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse), 540 ElseLoc(ElseLoc) {} 541 }; 542 543 using IncludedFilesSet = llvm::DenseSet<const FileEntry *>; 544 545 private: 546 friend class ASTReader; 547 friend class MacroArgs; 548 549 class PreambleConditionalStackStore { 550 enum State { 551 Off = 0, 552 Recording = 1, 553 Replaying = 2, 554 }; 555 556 public: 557 PreambleConditionalStackStore() = default; 558 559 void startRecording() { ConditionalStackState = Recording; } 560 void startReplaying() { ConditionalStackState = Replaying; } 561 bool isRecording() const { return ConditionalStackState == Recording; } 562 bool isReplaying() const { return ConditionalStackState == Replaying; } 563 564 ArrayRef<PPConditionalInfo> getStack() const { 565 return ConditionalStack; 566 } 567 568 void doneReplaying() { 569 ConditionalStack.clear(); 570 ConditionalStackState = Off; 571 } 572 573 void setStack(ArrayRef<PPConditionalInfo> s) { 574 if (!isRecording() && !isReplaying()) 575 return; 576 ConditionalStack.clear(); 577 ConditionalStack.append(s.begin(), s.end()); 578 } 579 580 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); } 581 582 bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); } 583 584 void clearSkipInfo() { SkipInfo.reset(); } 585 586 llvm::Optional<PreambleSkipInfo> SkipInfo; 587 588 private: 589 SmallVector<PPConditionalInfo, 4> ConditionalStack; 590 State ConditionalStackState = Off; 591 } PreambleConditionalStack; 592 593 /// The current top of the stack that we're lexing from if 594 /// not expanding a macro and we are lexing directly from source code. 595 /// 596 /// Only one of CurLexer, or CurTokenLexer will be non-null. 597 std::unique_ptr<Lexer> CurLexer; 598 599 /// The current top of the stack what we're lexing from 600 /// if not expanding a macro. 601 /// 602 /// This is an alias for CurLexer. 603 PreprocessorLexer *CurPPLexer = nullptr; 604 605 /// Used to find the current FileEntry, if CurLexer is non-null 606 /// and if applicable. 607 /// 608 /// This allows us to implement \#include_next and find directory-specific 609 /// properties. 610 ConstSearchDirIterator CurDirLookup = nullptr; 611 612 /// The current macro we are expanding, if we are expanding a macro. 613 /// 614 /// One of CurLexer and CurTokenLexer must be null. 615 std::unique_ptr<TokenLexer> CurTokenLexer; 616 617 /// The kind of lexer we're currently working with. 618 enum CurLexerKind { 619 CLK_Lexer, 620 CLK_TokenLexer, 621 CLK_CachingLexer, 622 CLK_DependencyDirectivesLexer, 623 CLK_LexAfterModuleImport 624 } CurLexerKind = CLK_Lexer; 625 626 /// If the current lexer is for a submodule that is being built, this 627 /// is that submodule. 628 Module *CurLexerSubmodule = nullptr; 629 630 /// Keeps track of the stack of files currently 631 /// \#included, and macros currently being expanded from, not counting 632 /// CurLexer/CurTokenLexer. 633 struct IncludeStackInfo { 634 enum CurLexerKind CurLexerKind; 635 Module *TheSubmodule; 636 std::unique_ptr<Lexer> TheLexer; 637 PreprocessorLexer *ThePPLexer; 638 std::unique_ptr<TokenLexer> TheTokenLexer; 639 ConstSearchDirIterator TheDirLookup; 640 641 // The following constructors are completely useless copies of the default 642 // versions, only needed to pacify MSVC. 643 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, 644 std::unique_ptr<Lexer> &&TheLexer, 645 PreprocessorLexer *ThePPLexer, 646 std::unique_ptr<TokenLexer> &&TheTokenLexer, 647 ConstSearchDirIterator TheDirLookup) 648 : CurLexerKind(std::move(CurLexerKind)), 649 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 650 ThePPLexer(std::move(ThePPLexer)), 651 TheTokenLexer(std::move(TheTokenLexer)), 652 TheDirLookup(std::move(TheDirLookup)) {} 653 }; 654 std::vector<IncludeStackInfo> IncludeMacroStack; 655 656 /// Actions invoked when some preprocessor activity is 657 /// encountered (e.g. a file is \#included, etc). 658 std::unique_ptr<PPCallbacks> Callbacks; 659 660 struct MacroExpandsInfo { 661 Token Tok; 662 MacroDefinition MD; 663 SourceRange Range; 664 665 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 666 : Tok(Tok), MD(MD), Range(Range) {} 667 }; 668 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 669 670 /// Information about a name that has been used to define a module macro. 671 struct ModuleMacroInfo { 672 /// The most recent macro directive for this identifier. 673 MacroDirective *MD; 674 675 /// The active module macros for this identifier. 676 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros; 677 678 /// The generation number at which we last updated ActiveModuleMacros. 679 /// \see Preprocessor::VisibleModules. 680 unsigned ActiveModuleMacrosGeneration = 0; 681 682 /// Whether this macro name is ambiguous. 683 bool IsAmbiguous = false; 684 685 /// The module macros that are overridden by this macro. 686 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros; 687 688 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {} 689 }; 690 691 /// The state of a macro for an identifier. 692 class MacroState { 693 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 694 695 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 696 const IdentifierInfo *II) const { 697 if (II->isOutOfDate()) 698 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 699 // FIXME: Find a spare bit on IdentifierInfo and store a 700 // HasModuleMacros flag. 701 if (!II->hasMacroDefinition() || 702 (!PP.getLangOpts().Modules && 703 !PP.getLangOpts().ModulesLocalVisibility) || 704 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 705 return nullptr; 706 707 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 708 if (!Info) { 709 Info = new (PP.getPreprocessorAllocator()) 710 ModuleMacroInfo(State.get<MacroDirective *>()); 711 State = Info; 712 } 713 714 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 715 Info->ActiveModuleMacrosGeneration) 716 PP.updateModuleMacroInfo(II, *Info); 717 return Info; 718 } 719 720 public: 721 MacroState() : MacroState(nullptr) {} 722 MacroState(MacroDirective *MD) : State(MD) {} 723 724 MacroState(MacroState &&O) noexcept : State(O.State) { 725 O.State = (MacroDirective *)nullptr; 726 } 727 728 MacroState &operator=(MacroState &&O) noexcept { 729 auto S = O.State; 730 O.State = (MacroDirective *)nullptr; 731 State = S; 732 return *this; 733 } 734 735 ~MacroState() { 736 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 737 Info->~ModuleMacroInfo(); 738 } 739 740 MacroDirective *getLatest() const { 741 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 742 return Info->MD; 743 return State.get<MacroDirective*>(); 744 } 745 746 void setLatest(MacroDirective *MD) { 747 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 748 Info->MD = MD; 749 else 750 State = MD; 751 } 752 753 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 754 auto *Info = getModuleInfo(PP, II); 755 return Info ? Info->IsAmbiguous : false; 756 } 757 758 ArrayRef<ModuleMacro *> 759 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 760 if (auto *Info = getModuleInfo(PP, II)) 761 return Info->ActiveModuleMacros; 762 return None; 763 } 764 765 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 766 SourceManager &SourceMgr) const { 767 // FIXME: Incorporate module macros into the result of this. 768 if (auto *Latest = getLatest()) 769 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 770 return {}; 771 } 772 773 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 774 if (auto *Info = getModuleInfo(PP, II)) { 775 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 776 Info->ActiveModuleMacros.begin(), 777 Info->ActiveModuleMacros.end()); 778 Info->ActiveModuleMacros.clear(); 779 Info->IsAmbiguous = false; 780 } 781 } 782 783 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 784 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 785 return Info->OverriddenMacros; 786 return None; 787 } 788 789 void setOverriddenMacros(Preprocessor &PP, 790 ArrayRef<ModuleMacro *> Overrides) { 791 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 792 if (!Info) { 793 if (Overrides.empty()) 794 return; 795 Info = new (PP.getPreprocessorAllocator()) 796 ModuleMacroInfo(State.get<MacroDirective *>()); 797 State = Info; 798 } 799 Info->OverriddenMacros.clear(); 800 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 801 Overrides.begin(), Overrides.end()); 802 Info->ActiveModuleMacrosGeneration = 0; 803 } 804 }; 805 806 /// For each IdentifierInfo that was associated with a macro, we 807 /// keep a mapping to the history of all macro definitions and #undefs in 808 /// the reverse order (the latest one is in the head of the list). 809 /// 810 /// This mapping lives within the \p CurSubmoduleState. 811 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>; 812 813 struct SubmoduleState; 814 815 /// Information about a submodule that we're currently building. 816 struct BuildingSubmoduleInfo { 817 /// The module that we are building. 818 Module *M; 819 820 /// The location at which the module was included. 821 SourceLocation ImportLoc; 822 823 /// Whether we entered this submodule via a pragma. 824 bool IsPragma; 825 826 /// The previous SubmoduleState. 827 SubmoduleState *OuterSubmoduleState; 828 829 /// The number of pending module macro names when we started building this. 830 unsigned OuterPendingModuleMacroNames; 831 832 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma, 833 SubmoduleState *OuterSubmoduleState, 834 unsigned OuterPendingModuleMacroNames) 835 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma), 836 OuterSubmoduleState(OuterSubmoduleState), 837 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 838 }; 839 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 840 841 /// Information about a submodule's preprocessor state. 842 struct SubmoduleState { 843 /// The macros for the submodule. 844 MacroMap Macros; 845 846 /// The set of modules that are visible within the submodule. 847 VisibleModuleSet VisibleModules; 848 849 // FIXME: CounterValue? 850 // FIXME: PragmaPushMacroInfo? 851 }; 852 std::map<Module *, SubmoduleState> Submodules; 853 854 /// The preprocessor state for preprocessing outside of any submodule. 855 SubmoduleState NullSubmoduleState; 856 857 /// The current submodule state. Will be \p NullSubmoduleState if we're not 858 /// in a submodule. 859 SubmoduleState *CurSubmoduleState; 860 861 /// The files that have been included. 862 IncludedFilesSet IncludedFiles; 863 864 /// The set of known macros exported from modules. 865 llvm::FoldingSet<ModuleMacro> ModuleMacros; 866 867 /// The names of potential module macros that we've not yet processed. 868 llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames; 869 870 /// The list of module macros, for each identifier, that are not overridden by 871 /// any other module macro. 872 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>> 873 LeafModuleMacros; 874 875 /// Macros that we want to warn because they are not used at the end 876 /// of the translation unit. 877 /// 878 /// We store just their SourceLocations instead of 879 /// something like MacroInfo*. The benefit of this is that when we are 880 /// deserializing from PCH, we don't need to deserialize identifier & macros 881 /// just so that we can report that they are unused, we just warn using 882 /// the SourceLocations of this set (that will be filled by the ASTReader). 883 using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>; 884 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 885 886 /// This is a pair of an optional message and source location used for pragmas 887 /// that annotate macros like pragma clang restrict_expansion and pragma clang 888 /// deprecated. This pair stores the optional message and the location of the 889 /// annotation pragma for use producing diagnostics and notes. 890 using MsgLocationPair = std::pair<std::string, SourceLocation>; 891 892 struct MacroAnnotationInfo { 893 SourceLocation Location; 894 std::string Message; 895 }; 896 897 struct MacroAnnotations { 898 llvm::Optional<MacroAnnotationInfo> DeprecationInfo; 899 llvm::Optional<MacroAnnotationInfo> RestrictExpansionInfo; 900 llvm::Optional<SourceLocation> FinalAnnotationLoc; 901 902 static MacroAnnotations makeDeprecation(SourceLocation Loc, 903 std::string Msg) { 904 return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)}, 905 llvm::None, llvm::None}; 906 } 907 908 static MacroAnnotations makeRestrictExpansion(SourceLocation Loc, 909 std::string Msg) { 910 return MacroAnnotations{ 911 llvm::None, MacroAnnotationInfo{Loc, std::move(Msg)}, llvm::None}; 912 } 913 914 static MacroAnnotations makeFinal(SourceLocation Loc) { 915 return MacroAnnotations{llvm::None, llvm::None, Loc}; 916 } 917 }; 918 919 /// Warning information for macro annotations. 920 llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos; 921 922 /// A "freelist" of MacroArg objects that can be 923 /// reused for quick allocation. 924 MacroArgs *MacroArgCache = nullptr; 925 926 /// For each IdentifierInfo used in a \#pragma push_macro directive, 927 /// we keep a MacroInfo stack used to restore the previous macro value. 928 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>> 929 PragmaPushMacroInfo; 930 931 // Various statistics we track for performance analysis. 932 unsigned NumDirectives = 0; 933 unsigned NumDefined = 0; 934 unsigned NumUndefined = 0; 935 unsigned NumPragma = 0; 936 unsigned NumIf = 0; 937 unsigned NumElse = 0; 938 unsigned NumEndif = 0; 939 unsigned NumEnteredSourceFiles = 0; 940 unsigned MaxIncludeStackDepth = 0; 941 unsigned NumMacroExpanded = 0; 942 unsigned NumFnMacroExpanded = 0; 943 unsigned NumBuiltinMacroExpanded = 0; 944 unsigned NumFastMacroExpanded = 0; 945 unsigned NumTokenPaste = 0; 946 unsigned NumFastTokenPaste = 0; 947 unsigned NumSkipped = 0; 948 949 /// The predefined macros that preprocessor should use from the 950 /// command line etc. 951 std::string Predefines; 952 953 /// The file ID for the preprocessor predefines. 954 FileID PredefinesFileID; 955 956 /// The file ID for the PCH through header. 957 FileID PCHThroughHeaderFileID; 958 959 /// Whether tokens are being skipped until a #pragma hdrstop is seen. 960 bool SkippingUntilPragmaHdrStop = false; 961 962 /// Whether tokens are being skipped until the through header is seen. 963 bool SkippingUntilPCHThroughHeader = false; 964 965 /// \{ 966 /// Cache of macro expanders to reduce malloc traffic. 967 enum { TokenLexerCacheSize = 8 }; 968 unsigned NumCachedTokenLexers; 969 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 970 /// \} 971 972 /// Keeps macro expanded tokens for TokenLexers. 973 // 974 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 975 /// going to lex in the cache and when it finishes the tokens are removed 976 /// from the end of the cache. 977 SmallVector<Token, 16> MacroExpandedTokens; 978 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack; 979 980 /// A record of the macro definitions and expansions that 981 /// occurred during preprocessing. 982 /// 983 /// This is an optional side structure that can be enabled with 984 /// \c createPreprocessingRecord() prior to preprocessing. 985 PreprocessingRecord *Record = nullptr; 986 987 /// Cached tokens state. 988 using CachedTokensTy = SmallVector<Token, 1>; 989 990 /// Cached tokens are stored here when we do backtracking or 991 /// lookahead. They are "lexed" by the CachingLex() method. 992 CachedTokensTy CachedTokens; 993 994 /// The position of the cached token that CachingLex() should 995 /// "lex" next. 996 /// 997 /// If it points beyond the CachedTokens vector, it means that a normal 998 /// Lex() should be invoked. 999 CachedTokensTy::size_type CachedLexPos = 0; 1000 1001 /// Stack of backtrack positions, allowing nested backtracks. 1002 /// 1003 /// The EnableBacktrackAtThisPos() method pushes a position to 1004 /// indicate where CachedLexPos should be set when the BackTrack() method is 1005 /// invoked (at which point the last position is popped). 1006 std::vector<CachedTokensTy::size_type> BacktrackPositions; 1007 1008 struct MacroInfoChain { 1009 MacroInfo MI; 1010 MacroInfoChain *Next; 1011 }; 1012 1013 /// MacroInfos are managed as a chain for easy disposal. This is the head 1014 /// of that list. 1015 MacroInfoChain *MIChainHead = nullptr; 1016 1017 /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running. 1018 /// This is used to guard against calling this function recursively. 1019 /// 1020 /// See comments at the use-site for more context about why it is needed. 1021 bool SkippingExcludedConditionalBlock = false; 1022 1023 /// Keeps track of skipped range mappings that were recorded while skipping 1024 /// excluded conditional directives. It maps the source buffer pointer at 1025 /// the beginning of a skipped block, to the number of bytes that should be 1026 /// skipped. 1027 llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges; 1028 1029 void updateOutOfDateIdentifier(IdentifierInfo &II) const; 1030 1031 public: 1032 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 1033 DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM, 1034 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 1035 IdentifierInfoLookup *IILookup = nullptr, 1036 bool OwnsHeaderSearch = false, 1037 TranslationUnitKind TUKind = TU_Complete); 1038 1039 ~Preprocessor(); 1040 1041 /// Initialize the preprocessor using information about the target. 1042 /// 1043 /// \param Target is owned by the caller and must remain valid for the 1044 /// lifetime of the preprocessor. 1045 /// \param AuxTarget is owned by the caller and must remain valid for 1046 /// the lifetime of the preprocessor. 1047 void Initialize(const TargetInfo &Target, 1048 const TargetInfo *AuxTarget = nullptr); 1049 1050 /// Initialize the preprocessor to parse a model file 1051 /// 1052 /// To parse model files the preprocessor of the original source is reused to 1053 /// preserver the identifier table. However to avoid some duplicate 1054 /// information in the preprocessor some cleanup is needed before it is used 1055 /// to parse model files. This method does that cleanup. 1056 void InitializeForModelFile(); 1057 1058 /// Cleanup after model file parsing 1059 void FinalizeForModelFile(); 1060 1061 /// Retrieve the preprocessor options used to initialize this 1062 /// preprocessor. 1063 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 1064 1065 DiagnosticsEngine &getDiagnostics() const { return *Diags; } 1066 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 1067 1068 const LangOptions &getLangOpts() const { return LangOpts; } 1069 const TargetInfo &getTargetInfo() const { return *Target; } 1070 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } 1071 FileManager &getFileManager() const { return FileMgr; } 1072 SourceManager &getSourceManager() const { return SourceMgr; } 1073 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 1074 1075 IdentifierTable &getIdentifierTable() { return Identifiers; } 1076 const IdentifierTable &getIdentifierTable() const { return Identifiers; } 1077 SelectorTable &getSelectorTable() { return Selectors; } 1078 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; } 1079 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 1080 1081 void setExternalSource(ExternalPreprocessorSource *Source) { 1082 ExternalSource = Source; 1083 } 1084 1085 ExternalPreprocessorSource *getExternalSource() const { 1086 return ExternalSource; 1087 } 1088 1089 /// Retrieve the module loader associated with this preprocessor. 1090 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 1091 1092 bool hadModuleLoaderFatalFailure() const { 1093 return TheModuleLoader.HadFatalFailure; 1094 } 1095 1096 /// Retrieve the number of Directives that have been processed by the 1097 /// Preprocessor. 1098 unsigned getNumDirectives() const { 1099 return NumDirectives; 1100 } 1101 1102 /// True if we are currently preprocessing a #if or #elif directive 1103 bool isParsingIfOrElifDirective() const { 1104 return ParsingIfOrElifDirective; 1105 } 1106 1107 /// Control whether the preprocessor retains comments in output. 1108 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 1109 this->KeepComments = KeepComments | KeepMacroComments; 1110 this->KeepMacroComments = KeepMacroComments; 1111 } 1112 1113 bool getCommentRetentionState() const { return KeepComments; } 1114 1115 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } 1116 bool getPragmasEnabled() const { return PragmasEnabled; } 1117 1118 void SetSuppressIncludeNotFoundError(bool Suppress) { 1119 SuppressIncludeNotFoundError = Suppress; 1120 } 1121 1122 bool GetSuppressIncludeNotFoundError() { 1123 return SuppressIncludeNotFoundError; 1124 } 1125 1126 /// Sets whether the preprocessor is responsible for producing output or if 1127 /// it is producing tokens to be consumed by Parse and Sema. 1128 void setPreprocessedOutput(bool IsPreprocessedOutput) { 1129 PreprocessedOutput = IsPreprocessedOutput; 1130 } 1131 1132 /// Returns true if the preprocessor is responsible for generating output, 1133 /// false if it is producing tokens to be consumed by Parse and Sema. 1134 bool isPreprocessedOutput() const { return PreprocessedOutput; } 1135 1136 /// Return true if we are lexing directly from the specified lexer. 1137 bool isCurrentLexer(const PreprocessorLexer *L) const { 1138 return CurPPLexer == L; 1139 } 1140 1141 /// Return the current lexer being lexed from. 1142 /// 1143 /// Note that this ignores any potentially active macro expansions and _Pragma 1144 /// expansions going on at the time. 1145 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 1146 1147 /// Return the current file lexer being lexed from. 1148 /// 1149 /// Note that this ignores any potentially active macro expansions and _Pragma 1150 /// expansions going on at the time. 1151 PreprocessorLexer *getCurrentFileLexer() const; 1152 1153 /// Return the submodule owning the file being lexed. This may not be 1154 /// the current module if we have changed modules since entering the file. 1155 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; } 1156 1157 /// Returns the FileID for the preprocessor predefines. 1158 FileID getPredefinesFileID() const { return PredefinesFileID; } 1159 1160 /// \{ 1161 /// Accessors for preprocessor callbacks. 1162 /// 1163 /// Note that this class takes ownership of any PPCallbacks object given to 1164 /// it. 1165 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } 1166 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 1167 if (Callbacks) 1168 C = std::make_unique<PPChainedCallbacks>(std::move(C), 1169 std::move(Callbacks)); 1170 Callbacks = std::move(C); 1171 } 1172 /// \} 1173 1174 /// Get the number of tokens processed so far. 1175 unsigned getTokenCount() const { return TokenCount; } 1176 1177 /// Get the max number of tokens before issuing a -Wmax-tokens warning. 1178 unsigned getMaxTokens() const { return MaxTokens; } 1179 1180 void overrideMaxTokens(unsigned Value, SourceLocation Loc) { 1181 MaxTokens = Value; 1182 MaxTokensOverrideLoc = Loc; 1183 }; 1184 1185 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; } 1186 1187 /// Register a function that would be called on each token in the final 1188 /// expanded token stream. 1189 /// This also reports annotation tokens produced by the parser. 1190 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) { 1191 OnToken = std::move(F); 1192 } 1193 1194 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; } 1195 1196 bool isMacroDefined(StringRef Id) { 1197 return isMacroDefined(&Identifiers.get(Id)); 1198 } 1199 bool isMacroDefined(const IdentifierInfo *II) { 1200 return II->hasMacroDefinition() && 1201 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 1202 } 1203 1204 /// Determine whether II is defined as a macro within the module M, 1205 /// if that is a module that we've already preprocessed. Does not check for 1206 /// macros imported into M. 1207 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 1208 if (!II->hasMacroDefinition()) 1209 return false; 1210 auto I = Submodules.find(M); 1211 if (I == Submodules.end()) 1212 return false; 1213 auto J = I->second.Macros.find(II); 1214 if (J == I->second.Macros.end()) 1215 return false; 1216 auto *MD = J->second.getLatest(); 1217 return MD && MD->isDefined(); 1218 } 1219 1220 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 1221 if (!II->hasMacroDefinition()) 1222 return {}; 1223 1224 MacroState &S = CurSubmoduleState->Macros[II]; 1225 auto *MD = S.getLatest(); 1226 while (MD && isa<VisibilityMacroDirective>(MD)) 1227 MD = MD->getPrevious(); 1228 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 1229 S.getActiveModuleMacros(*this, II), 1230 S.isAmbiguous(*this, II)); 1231 } 1232 1233 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 1234 SourceLocation Loc) { 1235 if (!II->hadMacroDefinition()) 1236 return {}; 1237 1238 MacroState &S = CurSubmoduleState->Macros[II]; 1239 MacroDirective::DefInfo DI; 1240 if (auto *MD = S.getLatest()) 1241 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 1242 // FIXME: Compute the set of active module macros at the specified location. 1243 return MacroDefinition(DI.getDirective(), 1244 S.getActiveModuleMacros(*this, II), 1245 S.isAmbiguous(*this, II)); 1246 } 1247 1248 /// Given an identifier, return its latest non-imported MacroDirective 1249 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. 1250 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 1251 if (!II->hasMacroDefinition()) 1252 return nullptr; 1253 1254 auto *MD = getLocalMacroDirectiveHistory(II); 1255 if (!MD || MD->getDefinition().isUndefined()) 1256 return nullptr; 1257 1258 return MD; 1259 } 1260 1261 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 1262 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 1263 } 1264 1265 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 1266 if (!II->hasMacroDefinition()) 1267 return nullptr; 1268 if (auto MD = getMacroDefinition(II)) 1269 return MD.getMacroInfo(); 1270 return nullptr; 1271 } 1272 1273 /// Given an identifier, return the latest non-imported macro 1274 /// directive for that identifier. 1275 /// 1276 /// One can iterate over all previous macro directives from the most recent 1277 /// one. 1278 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 1279 1280 /// Add a directive to the macro directive history for this identifier. 1281 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); 1282 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 1283 SourceLocation Loc) { 1284 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 1285 appendMacroDirective(II, MD); 1286 return MD; 1287 } 1288 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 1289 MacroInfo *MI) { 1290 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 1291 } 1292 1293 /// Set a MacroDirective that was loaded from a PCH file. 1294 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, 1295 MacroDirective *MD); 1296 1297 /// Register an exported macro for a module and identifier. 1298 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, 1299 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 1300 ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II); 1301 1302 /// Get the list of leaf (non-overridden) module macros for a name. 1303 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 1304 if (II->isOutOfDate()) 1305 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 1306 auto I = LeafModuleMacros.find(II); 1307 if (I != LeafModuleMacros.end()) 1308 return I->second; 1309 return None; 1310 } 1311 1312 /// Get the list of submodules that we're currently building. 1313 ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const { 1314 return BuildingSubmoduleStack; 1315 } 1316 1317 /// \{ 1318 /// Iterators for the macro history table. Currently defined macros have 1319 /// IdentifierInfo::hasMacroDefinition() set and an empty 1320 /// MacroInfo::getUndefLoc() at the head of the list. 1321 using macro_iterator = MacroMap::const_iterator; 1322 1323 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 1324 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 1325 1326 llvm::iterator_range<macro_iterator> 1327 macros(bool IncludeExternalMacros = true) const { 1328 macro_iterator begin = macro_begin(IncludeExternalMacros); 1329 macro_iterator end = macro_end(IncludeExternalMacros); 1330 return llvm::make_range(begin, end); 1331 } 1332 1333 /// \} 1334 1335 /// Mark the file as included. 1336 /// Returns true if this is the first time the file was included. 1337 bool markIncluded(const FileEntry *File) { 1338 HeaderInfo.getFileInfo(File); 1339 return IncludedFiles.insert(File).second; 1340 } 1341 1342 /// Return true if this header has already been included. 1343 bool alreadyIncluded(const FileEntry *File) const { 1344 return IncludedFiles.count(File); 1345 } 1346 1347 /// Get the set of included files. 1348 IncludedFilesSet &getIncludedFiles() { return IncludedFiles; } 1349 const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; } 1350 1351 /// Return the name of the macro defined before \p Loc that has 1352 /// spelling \p Tokens. If there are multiple macros with same spelling, 1353 /// return the last one defined. 1354 StringRef getLastMacroWithSpelling(SourceLocation Loc, 1355 ArrayRef<TokenValue> Tokens) const; 1356 1357 /// Set the predefines for this Preprocessor. 1358 /// 1359 /// These predefines are automatically injected when parsing the main file. 1360 void setPredefines(std::string P) { Predefines = std::move(P); } 1361 1362 /// Return information about the specified preprocessor 1363 /// identifier token. 1364 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 1365 return &Identifiers.get(Name); 1366 } 1367 1368 /// Add the specified pragma handler to this preprocessor. 1369 /// 1370 /// If \p Namespace is non-null, then it is a token required to exist on the 1371 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 1372 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); 1373 void AddPragmaHandler(PragmaHandler *Handler) { 1374 AddPragmaHandler(StringRef(), Handler); 1375 } 1376 1377 /// Remove the specific pragma handler from this preprocessor. 1378 /// 1379 /// If \p Namespace is non-null, then it should be the namespace that 1380 /// \p Handler was added to. It is an error to remove a handler that 1381 /// has not been registered. 1382 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); 1383 void RemovePragmaHandler(PragmaHandler *Handler) { 1384 RemovePragmaHandler(StringRef(), Handler); 1385 } 1386 1387 /// Install empty handlers for all pragmas (making them ignored). 1388 void IgnorePragmas(); 1389 1390 /// Set empty line handler. 1391 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; } 1392 1393 EmptylineHandler *getEmptylineHandler() const { return Emptyline; } 1394 1395 /// Add the specified comment handler to the preprocessor. 1396 void addCommentHandler(CommentHandler *Handler); 1397 1398 /// Remove the specified comment handler. 1399 /// 1400 /// It is an error to remove a handler that has not been registered. 1401 void removeCommentHandler(CommentHandler *Handler); 1402 1403 /// Set the code completion handler to the given object. 1404 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 1405 CodeComplete = &Handler; 1406 } 1407 1408 /// Retrieve the current code-completion handler. 1409 CodeCompletionHandler *getCodeCompletionHandler() const { 1410 return CodeComplete; 1411 } 1412 1413 /// Clear out the code completion handler. 1414 void clearCodeCompletionHandler() { 1415 CodeComplete = nullptr; 1416 } 1417 1418 /// Hook used by the lexer to invoke the "included file" code 1419 /// completion point. 1420 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled); 1421 1422 /// Hook used by the lexer to invoke the "natural language" code 1423 /// completion point. 1424 void CodeCompleteNaturalLanguage(); 1425 1426 /// Set the code completion token for filtering purposes. 1427 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) { 1428 CodeCompletionII = Filter; 1429 } 1430 1431 /// Set the code completion token range for detecting replacement range later 1432 /// on. 1433 void setCodeCompletionTokenRange(const SourceLocation Start, 1434 const SourceLocation End) { 1435 CodeCompletionTokenRange = {Start, End}; 1436 } 1437 SourceRange getCodeCompletionTokenRange() const { 1438 return CodeCompletionTokenRange; 1439 } 1440 1441 /// Get the code completion token for filtering purposes. 1442 StringRef getCodeCompletionFilter() { 1443 if (CodeCompletionII) 1444 return CodeCompletionII->getName(); 1445 return {}; 1446 } 1447 1448 /// Retrieve the preprocessing record, or NULL if there is no 1449 /// preprocessing record. 1450 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 1451 1452 /// Create a new preprocessing record, which will keep track of 1453 /// all macro expansions, macro definitions, etc. 1454 void createPreprocessingRecord(); 1455 1456 /// Returns true if the FileEntry is the PCH through header. 1457 bool isPCHThroughHeader(const FileEntry *FE); 1458 1459 /// True if creating a PCH with a through header. 1460 bool creatingPCHWithThroughHeader(); 1461 1462 /// True if using a PCH with a through header. 1463 bool usingPCHWithThroughHeader(); 1464 1465 /// True if creating a PCH with a #pragma hdrstop. 1466 bool creatingPCHWithPragmaHdrStop(); 1467 1468 /// True if using a PCH with a #pragma hdrstop. 1469 bool usingPCHWithPragmaHdrStop(); 1470 1471 /// Skip tokens until after the #include of the through header or 1472 /// until after a #pragma hdrstop. 1473 void SkipTokensWhileUsingPCH(); 1474 1475 /// Process directives while skipping until the through header or 1476 /// #pragma hdrstop is found. 1477 void HandleSkippedDirectiveWhileUsingPCH(Token &Result, 1478 SourceLocation HashLoc); 1479 1480 /// Enter the specified FileID as the main source file, 1481 /// which implicitly adds the builtin defines etc. 1482 void EnterMainSourceFile(); 1483 1484 /// Inform the preprocessor callbacks that processing is complete. 1485 void EndSourceFile(); 1486 1487 /// Add a source file to the top of the include stack and 1488 /// start lexing tokens from it instead of the current buffer. 1489 /// 1490 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1491 bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, 1492 SourceLocation Loc, bool IsFirstIncludeOfFile = true); 1493 1494 /// Add a Macro to the top of the include stack and start lexing 1495 /// tokens from it instead of the current buffer. 1496 /// 1497 /// \param Args specifies the tokens input to a function-like macro. 1498 /// \param ILEnd specifies the location of the ')' for a function-like macro 1499 /// or the identifier for an object-like macro. 1500 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, 1501 MacroArgs *Args); 1502 1503 private: 1504 /// Add a "macro" context to the top of the include stack, 1505 /// which will cause the lexer to start returning the specified tokens. 1506 /// 1507 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1508 /// will not be subject to further macro expansion. Otherwise, these tokens 1509 /// will be re-macro-expanded when/if expansion is enabled. 1510 /// 1511 /// If \p OwnsTokens is false, this method assumes that the specified stream 1512 /// of tokens has a permanent owner somewhere, so they do not need to be 1513 /// copied. If it is true, it assumes the array of tokens is allocated with 1514 /// \c new[] and the Preprocessor will delete[] it. 1515 /// 1516 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag 1517 /// set, see the flag documentation for details. 1518 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1519 bool DisableMacroExpansion, bool OwnsTokens, 1520 bool IsReinject); 1521 1522 public: 1523 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1524 bool DisableMacroExpansion, bool IsReinject) { 1525 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true, 1526 IsReinject); 1527 } 1528 1529 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion, 1530 bool IsReinject) { 1531 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false, 1532 IsReinject); 1533 } 1534 1535 /// Pop the current lexer/macro exp off the top of the lexer stack. 1536 /// 1537 /// This should only be used in situations where the current state of the 1538 /// top-of-stack lexer is known. 1539 void RemoveTopOfLexerStack(); 1540 1541 /// From the point that this method is called, and until 1542 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1543 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1544 /// make the Preprocessor re-lex the same tokens. 1545 /// 1546 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1547 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1548 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1549 /// 1550 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1551 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1552 /// tokens will continue indefinitely. 1553 /// 1554 void EnableBacktrackAtThisPos(); 1555 1556 /// Disable the last EnableBacktrackAtThisPos call. 1557 void CommitBacktrackedTokens(); 1558 1559 /// Make Preprocessor re-lex the tokens that were lexed since 1560 /// EnableBacktrackAtThisPos() was previously called. 1561 void Backtrack(); 1562 1563 /// True if EnableBacktrackAtThisPos() was called and 1564 /// caching of tokens is on. 1565 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1566 1567 /// Lex the next token for this preprocessor. 1568 void Lex(Token &Result); 1569 1570 /// Lex a token, forming a header-name token if possible. 1571 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); 1572 1573 bool LexAfterModuleImport(Token &Result); 1574 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); 1575 1576 void makeModuleVisible(Module *M, SourceLocation Loc); 1577 1578 SourceLocation getModuleImportLoc(Module *M) const { 1579 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1580 } 1581 1582 /// Lex a string literal, which may be the concatenation of multiple 1583 /// string literals and may even come from macro expansion. 1584 /// \returns true on success, false if a error diagnostic has been generated. 1585 bool LexStringLiteral(Token &Result, std::string &String, 1586 const char *DiagnosticTag, bool AllowMacroExpansion) { 1587 if (AllowMacroExpansion) 1588 Lex(Result); 1589 else 1590 LexUnexpandedToken(Result); 1591 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1592 AllowMacroExpansion); 1593 } 1594 1595 /// Complete the lexing of a string literal where the first token has 1596 /// already been lexed (see LexStringLiteral). 1597 bool FinishLexStringLiteral(Token &Result, std::string &String, 1598 const char *DiagnosticTag, 1599 bool AllowMacroExpansion); 1600 1601 /// Lex a token. If it's a comment, keep lexing until we get 1602 /// something not a comment. 1603 /// 1604 /// This is useful in -E -C mode where comments would foul up preprocessor 1605 /// directive handling. 1606 void LexNonComment(Token &Result) { 1607 do 1608 Lex(Result); 1609 while (Result.getKind() == tok::comment); 1610 } 1611 1612 /// Just like Lex, but disables macro expansion of identifier tokens. 1613 void LexUnexpandedToken(Token &Result) { 1614 // Disable macro expansion. 1615 bool OldVal = DisableMacroExpansion; 1616 DisableMacroExpansion = true; 1617 // Lex the token. 1618 Lex(Result); 1619 1620 // Reenable it. 1621 DisableMacroExpansion = OldVal; 1622 } 1623 1624 /// Like LexNonComment, but this disables macro expansion of 1625 /// identifier tokens. 1626 void LexUnexpandedNonComment(Token &Result) { 1627 do 1628 LexUnexpandedToken(Result); 1629 while (Result.getKind() == tok::comment); 1630 } 1631 1632 /// Parses a simple integer literal to get its numeric value. Floating 1633 /// point literals and user defined literals are rejected. Used primarily to 1634 /// handle pragmas that accept integer arguments. 1635 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1636 1637 /// Disables macro expansion everywhere except for preprocessor directives. 1638 void SetMacroExpansionOnlyInDirectives() { 1639 DisableMacroExpansion = true; 1640 MacroExpansionInDirectivesOverride = true; 1641 } 1642 1643 /// Peeks ahead N tokens and returns that token without consuming any 1644 /// tokens. 1645 /// 1646 /// LookAhead(0) returns the next token that would be returned by Lex(), 1647 /// LookAhead(1) returns the token after it, etc. This returns normal 1648 /// tokens after phase 5. As such, it is equivalent to using 1649 /// 'Lex', not 'LexUnexpandedToken'. 1650 const Token &LookAhead(unsigned N) { 1651 assert(LexLevel == 0 && "cannot use lookahead while lexing"); 1652 if (CachedLexPos + N < CachedTokens.size()) 1653 return CachedTokens[CachedLexPos+N]; 1654 else 1655 return PeekAhead(N+1); 1656 } 1657 1658 /// When backtracking is enabled and tokens are cached, 1659 /// this allows to revert a specific number of tokens. 1660 /// 1661 /// Note that the number of tokens being reverted should be up to the last 1662 /// backtrack position, not more. 1663 void RevertCachedTokens(unsigned N) { 1664 assert(isBacktrackEnabled() && 1665 "Should only be called when tokens are cached for backtracking"); 1666 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 1667 && "Should revert tokens up to the last backtrack position, not more"); 1668 assert(signed(CachedLexPos) - signed(N) >= 0 && 1669 "Corrupted backtrack positions ?"); 1670 CachedLexPos -= N; 1671 } 1672 1673 /// Enters a token in the token stream to be lexed next. 1674 /// 1675 /// If BackTrack() is called afterwards, the token will remain at the 1676 /// insertion point. 1677 /// If \p IsReinject is true, resulting token will have Token::IsReinjected 1678 /// flag set. See the flag documentation for details. 1679 void EnterToken(const Token &Tok, bool IsReinject) { 1680 if (LexLevel) { 1681 // It's not correct in general to enter caching lex mode while in the 1682 // middle of a nested lexing action. 1683 auto TokCopy = std::make_unique<Token[]>(1); 1684 TokCopy[0] = Tok; 1685 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject); 1686 } else { 1687 EnterCachingLexMode(); 1688 assert(IsReinject && "new tokens in the middle of cached stream"); 1689 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1690 } 1691 } 1692 1693 /// We notify the Preprocessor that if it is caching tokens (because 1694 /// backtrack is enabled) it should replace the most recent cached tokens 1695 /// with the given annotation token. This function has no effect if 1696 /// backtracking is not enabled. 1697 /// 1698 /// Note that the use of this function is just for optimization, so that the 1699 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1700 /// invoked. 1701 void AnnotateCachedTokens(const Token &Tok) { 1702 assert(Tok.isAnnotation() && "Expected annotation token"); 1703 if (CachedLexPos != 0 && isBacktrackEnabled()) 1704 AnnotatePreviousCachedTokens(Tok); 1705 } 1706 1707 /// Get the location of the last cached token, suitable for setting the end 1708 /// location of an annotation token. 1709 SourceLocation getLastCachedTokenLocation() const { 1710 assert(CachedLexPos != 0); 1711 return CachedTokens[CachedLexPos-1].getLastLoc(); 1712 } 1713 1714 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1715 /// CachedTokens. 1716 bool IsPreviousCachedToken(const Token &Tok) const; 1717 1718 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1719 /// in \p NewToks. 1720 /// 1721 /// Useful when a token needs to be split in smaller ones and CachedTokens 1722 /// most recent token must to be updated to reflect that. 1723 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1724 1725 /// Replace the last token with an annotation token. 1726 /// 1727 /// Like AnnotateCachedTokens(), this routine replaces an 1728 /// already-parsed (and resolved) token with an annotation 1729 /// token. However, this routine only replaces the last token with 1730 /// the annotation token; it does not affect any other cached 1731 /// tokens. This function has no effect if backtracking is not 1732 /// enabled. 1733 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1734 assert(Tok.isAnnotation() && "Expected annotation token"); 1735 if (CachedLexPos != 0 && isBacktrackEnabled()) 1736 CachedTokens[CachedLexPos-1] = Tok; 1737 } 1738 1739 /// Enter an annotation token into the token stream. 1740 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, 1741 void *AnnotationVal); 1742 1743 /// Determine whether it's possible for a future call to Lex to produce an 1744 /// annotation token created by a previous call to EnterAnnotationToken. 1745 bool mightHavePendingAnnotationTokens() { 1746 return CurLexerKind != CLK_Lexer; 1747 } 1748 1749 /// Update the current token to represent the provided 1750 /// identifier, in order to cache an action performed by typo correction. 1751 void TypoCorrectToken(const Token &Tok) { 1752 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1753 if (CachedLexPos != 0 && isBacktrackEnabled()) 1754 CachedTokens[CachedLexPos-1] = Tok; 1755 } 1756 1757 /// Recompute the current lexer kind based on the CurLexer/ 1758 /// CurTokenLexer pointers. 1759 void recomputeCurLexerKind(); 1760 1761 /// Returns true if incremental processing is enabled 1762 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1763 1764 /// Enables the incremental processing 1765 void enableIncrementalProcessing(bool value = true) { 1766 IncrementalProcessing = value; 1767 } 1768 1769 /// Specify the point at which code-completion will be performed. 1770 /// 1771 /// \param File the file in which code completion should occur. If 1772 /// this file is included multiple times, code-completion will 1773 /// perform completion the first time it is included. If NULL, this 1774 /// function clears out the code-completion point. 1775 /// 1776 /// \param Line the line at which code completion should occur 1777 /// (1-based). 1778 /// 1779 /// \param Column the column at which code completion should occur 1780 /// (1-based). 1781 /// 1782 /// \returns true if an error occurred, false otherwise. 1783 bool SetCodeCompletionPoint(const FileEntry *File, 1784 unsigned Line, unsigned Column); 1785 1786 /// Determine if we are performing code completion. 1787 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1788 1789 /// Returns the location of the code-completion point. 1790 /// 1791 /// Returns an invalid location if code-completion is not enabled or the file 1792 /// containing the code-completion point has not been lexed yet. 1793 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1794 1795 /// Returns the start location of the file of code-completion point. 1796 /// 1797 /// Returns an invalid location if code-completion is not enabled or the file 1798 /// containing the code-completion point has not been lexed yet. 1799 SourceLocation getCodeCompletionFileLoc() const { 1800 return CodeCompletionFileLoc; 1801 } 1802 1803 /// Returns true if code-completion is enabled and we have hit the 1804 /// code-completion point. 1805 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1806 1807 /// Note that we hit the code-completion point. 1808 void setCodeCompletionReached() { 1809 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1810 CodeCompletionReached = true; 1811 // Silence any diagnostics that occur after we hit the code-completion. 1812 getDiagnostics().setSuppressAllDiagnostics(true); 1813 } 1814 1815 /// The location of the currently-active \#pragma clang 1816 /// arc_cf_code_audited begin. 1817 /// 1818 /// Returns an invalid location if there is no such pragma active. 1819 std::pair<IdentifierInfo *, SourceLocation> 1820 getPragmaARCCFCodeAuditedInfo() const { 1821 return PragmaARCCFCodeAuditedInfo; 1822 } 1823 1824 /// Set the location of the currently-active \#pragma clang 1825 /// arc_cf_code_audited begin. An invalid location ends the pragma. 1826 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, 1827 SourceLocation Loc) { 1828 PragmaARCCFCodeAuditedInfo = {Ident, Loc}; 1829 } 1830 1831 /// The location of the currently-active \#pragma clang 1832 /// assume_nonnull begin. 1833 /// 1834 /// Returns an invalid location if there is no such pragma active. 1835 SourceLocation getPragmaAssumeNonNullLoc() const { 1836 return PragmaAssumeNonNullLoc; 1837 } 1838 1839 /// Set the location of the currently-active \#pragma clang 1840 /// assume_nonnull begin. An invalid location ends the pragma. 1841 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 1842 PragmaAssumeNonNullLoc = Loc; 1843 } 1844 1845 /// Get the location of the recorded unterminated \#pragma clang 1846 /// assume_nonnull begin in the preamble, if one exists. 1847 /// 1848 /// Returns an invalid location if the premable did not end with 1849 /// such a pragma active or if there is no recorded preamble. 1850 SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const { 1851 return PreambleRecordedPragmaAssumeNonNullLoc; 1852 } 1853 1854 /// Record the location of the unterminated \#pragma clang 1855 /// assume_nonnull begin in the preamble. 1856 void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) { 1857 PreambleRecordedPragmaAssumeNonNullLoc = Loc; 1858 } 1859 1860 /// Set the directory in which the main file should be considered 1861 /// to have been found, if it is not a real file. 1862 void setMainFileDir(const DirectoryEntry *Dir) { 1863 MainFileDir = Dir; 1864 } 1865 1866 /// Instruct the preprocessor to skip part of the main source file. 1867 /// 1868 /// \param Bytes The number of bytes in the preamble to skip. 1869 /// 1870 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 1871 /// start of a line. 1872 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 1873 SkipMainFilePreamble.first = Bytes; 1874 SkipMainFilePreamble.second = StartOfLine; 1875 } 1876 1877 /// Forwarding function for diagnostics. This emits a diagnostic at 1878 /// the specified Token's location, translating the token's start 1879 /// position in the current buffer into a SourcePosition object for rendering. 1880 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 1881 return Diags->Report(Loc, DiagID); 1882 } 1883 1884 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 1885 return Diags->Report(Tok.getLocation(), DiagID); 1886 } 1887 1888 /// Return the 'spelling' of the token at the given 1889 /// location; does not go up to the spelling location or down to the 1890 /// expansion location. 1891 /// 1892 /// \param buffer A buffer which will be used only if the token requires 1893 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 1894 /// \param invalid If non-null, will be set \c true if an error occurs. 1895 StringRef getSpelling(SourceLocation loc, 1896 SmallVectorImpl<char> &buffer, 1897 bool *invalid = nullptr) const { 1898 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 1899 } 1900 1901 /// Return the 'spelling' of the Tok token. 1902 /// 1903 /// The spelling of a token is the characters used to represent the token in 1904 /// the source file after trigraph expansion and escaped-newline folding. In 1905 /// particular, this wants to get the true, uncanonicalized, spelling of 1906 /// things like digraphs, UCNs, etc. 1907 /// 1908 /// \param Invalid If non-null, will be set \c true if an error occurs. 1909 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 1910 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 1911 } 1912 1913 /// Get the spelling of a token into a preallocated buffer, instead 1914 /// of as an std::string. 1915 /// 1916 /// The caller is required to allocate enough space for the token, which is 1917 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 1918 /// actual result is returned. 1919 /// 1920 /// Note that this method may do two possible things: it may either fill in 1921 /// the buffer specified with characters, or it may *change the input pointer* 1922 /// to point to a constant buffer with the data already in it (avoiding a 1923 /// copy). The caller is not allowed to modify the returned buffer pointer 1924 /// if an internal buffer is returned. 1925 unsigned getSpelling(const Token &Tok, const char *&Buffer, 1926 bool *Invalid = nullptr) const { 1927 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 1928 } 1929 1930 /// Get the spelling of a token into a SmallVector. 1931 /// 1932 /// Note that the returned StringRef may not point to the 1933 /// supplied buffer if a copy can be avoided. 1934 StringRef getSpelling(const Token &Tok, 1935 SmallVectorImpl<char> &Buffer, 1936 bool *Invalid = nullptr) const; 1937 1938 /// Relex the token at the specified location. 1939 /// \returns true if there was a failure, false on success. 1940 bool getRawToken(SourceLocation Loc, Token &Result, 1941 bool IgnoreWhiteSpace = false) { 1942 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 1943 } 1944 1945 /// Given a Token \p Tok that is a numeric constant with length 1, 1946 /// return the character. 1947 char 1948 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 1949 bool *Invalid = nullptr) const { 1950 assert(Tok.is(tok::numeric_constant) && 1951 Tok.getLength() == 1 && "Called on unsupported token"); 1952 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 1953 1954 // If the token is carrying a literal data pointer, just use it. 1955 if (const char *D = Tok.getLiteralData()) 1956 return *D; 1957 1958 // Otherwise, fall back on getCharacterData, which is slower, but always 1959 // works. 1960 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid); 1961 } 1962 1963 /// Retrieve the name of the immediate macro expansion. 1964 /// 1965 /// This routine starts from a source location, and finds the name of the 1966 /// macro responsible for its immediate expansion. It looks through any 1967 /// intervening macro argument expansions to compute this. It returns a 1968 /// StringRef that refers to the SourceManager-owned buffer of the source 1969 /// where that macro name is spelled. Thus, the result shouldn't out-live 1970 /// the SourceManager. 1971 StringRef getImmediateMacroName(SourceLocation Loc) { 1972 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 1973 } 1974 1975 /// Plop the specified string into a scratch buffer and set the 1976 /// specified token's location and length to it. 1977 /// 1978 /// If specified, the source location provides a location of the expansion 1979 /// point of the token. 1980 void CreateString(StringRef Str, Token &Tok, 1981 SourceLocation ExpansionLocStart = SourceLocation(), 1982 SourceLocation ExpansionLocEnd = SourceLocation()); 1983 1984 /// Split the first Length characters out of the token starting at TokLoc 1985 /// and return a location pointing to the split token. Re-lexing from the 1986 /// split token will return the split token rather than the original. 1987 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length); 1988 1989 /// Computes the source location just past the end of the 1990 /// token at this source location. 1991 /// 1992 /// This routine can be used to produce a source location that 1993 /// points just past the end of the token referenced by \p Loc, and 1994 /// is generally used when a diagnostic needs to point just after a 1995 /// token where it expected something different that it received. If 1996 /// the returned source location would not be meaningful (e.g., if 1997 /// it points into a macro), this routine returns an invalid 1998 /// source location. 1999 /// 2000 /// \param Offset an offset from the end of the token, where the source 2001 /// location should refer to. The default offset (0) produces a source 2002 /// location pointing just past the end of the token; an offset of 1 produces 2003 /// a source location pointing to the last character in the token, etc. 2004 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 2005 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 2006 } 2007 2008 /// Returns true if the given MacroID location points at the first 2009 /// token of the macro expansion. 2010 /// 2011 /// \param MacroBegin If non-null and function returns true, it is set to 2012 /// begin location of the macro. 2013 bool isAtStartOfMacroExpansion(SourceLocation loc, 2014 SourceLocation *MacroBegin = nullptr) const { 2015 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 2016 MacroBegin); 2017 } 2018 2019 /// Returns true if the given MacroID location points at the last 2020 /// token of the macro expansion. 2021 /// 2022 /// \param MacroEnd If non-null and function returns true, it is set to 2023 /// end location of the macro. 2024 bool isAtEndOfMacroExpansion(SourceLocation loc, 2025 SourceLocation *MacroEnd = nullptr) const { 2026 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 2027 } 2028 2029 /// Print the token to stderr, used for debugging. 2030 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 2031 void DumpLocation(SourceLocation Loc) const; 2032 void DumpMacro(const MacroInfo &MI) const; 2033 void dumpMacroInfo(const IdentifierInfo *II); 2034 2035 /// Given a location that specifies the start of a 2036 /// token, return a new location that specifies a character within the token. 2037 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 2038 unsigned Char) const { 2039 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 2040 } 2041 2042 /// Increment the counters for the number of token paste operations 2043 /// performed. 2044 /// 2045 /// If fast was specified, this is a 'fast paste' case we handled. 2046 void IncrementPasteCounter(bool isFast) { 2047 if (isFast) 2048 ++NumFastTokenPaste; 2049 else 2050 ++NumTokenPaste; 2051 } 2052 2053 void PrintStats(); 2054 2055 size_t getTotalMemory() const; 2056 2057 /// When the macro expander pastes together a comment (/##/) in Microsoft 2058 /// mode, this method handles updating the current state, returning the 2059 /// token on the next source line. 2060 void HandleMicrosoftCommentPaste(Token &Tok); 2061 2062 //===--------------------------------------------------------------------===// 2063 // Preprocessor callback methods. These are invoked by a lexer as various 2064 // directives and events are found. 2065 2066 /// Given a tok::raw_identifier token, look up the 2067 /// identifier information for the token and install it into the token, 2068 /// updating the token kind accordingly. 2069 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 2070 2071 private: 2072 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 2073 2074 public: 2075 /// Specifies the reason for poisoning an identifier. 2076 /// 2077 /// If that identifier is accessed while poisoned, then this reason will be 2078 /// used instead of the default "poisoned" diagnostic. 2079 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 2080 2081 /// Display reason for poisoned identifier. 2082 void HandlePoisonedIdentifier(Token & Identifier); 2083 2084 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 2085 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 2086 if(II->isPoisoned()) { 2087 HandlePoisonedIdentifier(Identifier); 2088 } 2089 } 2090 } 2091 2092 private: 2093 /// Identifiers used for SEH handling in Borland. These are only 2094 /// allowed in particular circumstances 2095 // __except block 2096 IdentifierInfo *Ident__exception_code, 2097 *Ident___exception_code, 2098 *Ident_GetExceptionCode; 2099 // __except filter expression 2100 IdentifierInfo *Ident__exception_info, 2101 *Ident___exception_info, 2102 *Ident_GetExceptionInfo; 2103 // __finally 2104 IdentifierInfo *Ident__abnormal_termination, 2105 *Ident___abnormal_termination, 2106 *Ident_AbnormalTermination; 2107 2108 const char *getCurLexerEndPos(); 2109 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); 2110 2111 public: 2112 void PoisonSEHIdentifiers(bool Poison = true); // Borland 2113 2114 /// Callback invoked when the lexer reads an identifier and has 2115 /// filled in the tokens IdentifierInfo member. 2116 /// 2117 /// This callback potentially macro expands it or turns it into a named 2118 /// token (like 'for'). 2119 /// 2120 /// \returns true if we actually computed a token, false if we need to 2121 /// lex again. 2122 bool HandleIdentifier(Token &Identifier); 2123 2124 /// Callback invoked when the lexer hits the end of the current file. 2125 /// 2126 /// This either returns the EOF token and returns true, or 2127 /// pops a level off the include stack and returns false, at which point the 2128 /// client should call lex again. 2129 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 2130 2131 /// Callback invoked when the current TokenLexer hits the end of its 2132 /// token stream. 2133 bool HandleEndOfTokenLexer(Token &Result); 2134 2135 /// Callback invoked when the lexer sees a # token at the start of a 2136 /// line. 2137 /// 2138 /// This consumes the directive, modifies the lexer/preprocessor state, and 2139 /// advances the lexer(s) so that the next token read is the correct one. 2140 void HandleDirective(Token &Result); 2141 2142 /// Ensure that the next token is a tok::eod token. 2143 /// 2144 /// If not, emit a diagnostic and consume up until the eod. 2145 /// If \p EnableMacros is true, then we consider macros that expand to zero 2146 /// tokens as being ok. 2147 /// 2148 /// \return The location of the end of the directive (the terminating 2149 /// newline). 2150 SourceLocation CheckEndOfDirective(const char *DirType, 2151 bool EnableMacros = false); 2152 2153 /// Read and discard all tokens remaining on the current line until 2154 /// the tok::eod token is found. Returns the range of the skipped tokens. 2155 SourceRange DiscardUntilEndOfDirective(); 2156 2157 /// Returns true if the preprocessor has seen a use of 2158 /// __DATE__ or __TIME__ in the file so far. 2159 bool SawDateOrTime() const { 2160 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 2161 } 2162 unsigned getCounterValue() const { return CounterValue; } 2163 void setCounterValue(unsigned V) { CounterValue = V; } 2164 2165 LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const { 2166 assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine && 2167 "FPEvalMethod should be set either from command line or from the " 2168 "target info"); 2169 return CurrentFPEvalMethod; 2170 } 2171 2172 LangOptions::FPEvalMethodKind getTUFPEvalMethod() const { 2173 return TUFPEvalMethod; 2174 } 2175 2176 SourceLocation getLastFPEvalPragmaLocation() const { 2177 return LastFPEvalPragmaLocation; 2178 } 2179 2180 LangOptions::FPEvalMethodKind getLastFPEvalMethod() const { 2181 return LastFPEvalMethod; 2182 } 2183 2184 void setLastFPEvalMethod(LangOptions::FPEvalMethodKind Val) { 2185 LastFPEvalMethod = Val; 2186 } 2187 2188 void setCurrentFPEvalMethod(SourceLocation PragmaLoc, 2189 LangOptions::FPEvalMethodKind Val) { 2190 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2191 "FPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2192 // This is the location of the '#pragma float_control" where the 2193 // execution state is modifed. 2194 LastFPEvalPragmaLocation = PragmaLoc; 2195 CurrentFPEvalMethod = Val; 2196 TUFPEvalMethod = Val; 2197 } 2198 2199 void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) { 2200 assert(Val != LangOptions::FEM_UnsetOnCommandLine && 2201 "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine"); 2202 TUFPEvalMethod = Val; 2203 } 2204 2205 /// Retrieves the module that we're currently building, if any. 2206 Module *getCurrentModule(); 2207 2208 /// Allocate a new MacroInfo object with the provided SourceLocation. 2209 MacroInfo *AllocateMacroInfo(SourceLocation L); 2210 2211 /// Turn the specified lexer token into a fully checked and spelled 2212 /// filename, e.g. as an operand of \#include. 2213 /// 2214 /// The caller is expected to provide a buffer that is large enough to hold 2215 /// the spelling of the filename, but is also expected to handle the case 2216 /// when this method decides to use a different buffer. 2217 /// 2218 /// \returns true if the input filename was in <>'s or false if it was 2219 /// in ""'s. 2220 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer); 2221 2222 /// Given a "foo" or \<foo> reference, look up the indicated file. 2223 /// 2224 /// Returns None on failure. \p isAngled indicates whether the file 2225 /// reference is for system \#include's or not (i.e. using <> instead of ""). 2226 Optional<FileEntryRef> 2227 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, 2228 ConstSearchDirIterator FromDir, const FileEntry *FromFile, 2229 ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath, 2230 SmallVectorImpl<char> *RelativePath, 2231 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, 2232 bool *IsFrameworkFound, bool SkipCache = false); 2233 2234 /// Return true if we're in the top-level file, not in a \#include. 2235 bool isInPrimaryFile() const; 2236 2237 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is 2238 /// followed by EOD. Return true if the token is not a valid on-off-switch. 2239 bool LexOnOffSwitch(tok::OnOffSwitch &Result); 2240 2241 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 2242 bool *ShadowFlag = nullptr); 2243 2244 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma); 2245 Module *LeaveSubmodule(bool ForPragma); 2246 2247 private: 2248 friend void TokenLexer::ExpandFunctionArguments(); 2249 2250 void PushIncludeMacroStack() { 2251 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer"); 2252 IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule, 2253 std::move(CurLexer), CurPPLexer, 2254 std::move(CurTokenLexer), CurDirLookup); 2255 CurPPLexer = nullptr; 2256 } 2257 2258 void PopIncludeMacroStack() { 2259 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 2260 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 2261 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 2262 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 2263 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; 2264 CurLexerKind = IncludeMacroStack.back().CurLexerKind; 2265 IncludeMacroStack.pop_back(); 2266 } 2267 2268 void PropagateLineStartLeadingSpaceInfo(Token &Result); 2269 2270 /// Determine whether we need to create module macros for #defines in the 2271 /// current context. 2272 bool needModuleMacros() const; 2273 2274 /// Update the set of active module macros and ambiguity flag for a module 2275 /// macro name. 2276 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 2277 2278 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 2279 SourceLocation Loc); 2280 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 2281 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 2282 bool isPublic); 2283 2284 /// Lex and validate a macro name, which occurs after a 2285 /// \#define or \#undef. 2286 /// 2287 /// \param MacroNameTok Token that represents the name defined or undefined. 2288 /// \param IsDefineUndef Kind if preprocessor directive. 2289 /// \param ShadowFlag Points to flag that is set if macro name shadows 2290 /// a keyword. 2291 /// 2292 /// This emits a diagnostic, sets the token kind to eod, 2293 /// and discards the rest of the macro line if the macro name is invalid. 2294 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 2295 bool *ShadowFlag = nullptr); 2296 2297 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 2298 /// entire line) of the macro's tokens and adds them to MacroInfo, and while 2299 /// doing so performs certain validity checks including (but not limited to): 2300 /// - # (stringization) is followed by a macro parameter 2301 /// \param MacroNameTok - Token that represents the macro name 2302 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard 2303 /// 2304 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and 2305 /// returns a nullptr if an invalid sequence of tokens is encountered. 2306 MacroInfo *ReadOptionalMacroParameterListAndBody( 2307 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard); 2308 2309 /// The ( starting an argument list of a macro definition has just been read. 2310 /// Lex the rest of the parameters and the closing ), updating \p MI with 2311 /// what we learn and saving in \p LastTok the last token read. 2312 /// Return true if an error occurs parsing the arg list. 2313 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); 2314 2315 /// Provide a suggestion for a typoed directive. If there is no typo, then 2316 /// just skip suggesting. 2317 /// 2318 /// \param Tok - Token that represents the directive 2319 /// \param Directive - String reference for the directive name 2320 void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const; 2321 2322 /// We just read a \#if or related directive and decided that the 2323 /// subsequent tokens are in the \#if'd out portion of the 2324 /// file. Lex the rest of the file, until we see an \#endif. If \p 2325 /// FoundNonSkipPortion is true, then we have already emitted code for part of 2326 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 2327 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 2328 /// already seen one so a \#else directive is a duplicate. When this returns, 2329 /// the caller can lex the first valid token. 2330 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, 2331 SourceLocation IfTokenLoc, 2332 bool FoundNonSkipPortion, bool FoundElse, 2333 SourceLocation ElseLoc = SourceLocation()); 2334 2335 /// Information about the result for evaluating an expression for a 2336 /// preprocessor directive. 2337 struct DirectiveEvalResult { 2338 /// Whether the expression was evaluated as true or not. 2339 bool Conditional; 2340 2341 /// True if the expression contained identifiers that were undefined. 2342 bool IncludedUndefinedIds; 2343 2344 /// The source range for the expression. 2345 SourceRange ExprRange; 2346 }; 2347 2348 /// Evaluate an integer constant expression that may occur after a 2349 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2350 /// 2351 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2352 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); 2353 2354 /// Process a '__has_include("path")' expression. 2355 /// 2356 /// Returns true if successful. 2357 bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II); 2358 2359 /// Process '__has_include_next("path")' expression. 2360 /// 2361 /// Returns true if successful. 2362 bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II); 2363 2364 /// Get the directory and file from which to start \#include_next lookup. 2365 std::pair<ConstSearchDirIterator, const FileEntry *> 2366 getIncludeNextStart(const Token &IncludeNextTok) const; 2367 2368 /// Install the standard preprocessor pragmas: 2369 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 2370 void RegisterBuiltinPragmas(); 2371 2372 /// Register builtin macros such as __LINE__ with the identifier table. 2373 void RegisterBuiltinMacros(); 2374 2375 /// If an identifier token is read that is to be expanded as a macro, handle 2376 /// it and return the next token as 'Tok'. If we lexed a token, return true; 2377 /// otherwise the caller should lex again. 2378 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD); 2379 2380 /// Cache macro expanded tokens for TokenLexers. 2381 // 2382 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 2383 /// going to lex in the cache and when it finishes the tokens are removed 2384 /// from the end of the cache. 2385 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 2386 ArrayRef<Token> tokens); 2387 2388 void removeCachedMacroExpandedTokensOfLastLexer(); 2389 2390 /// Determine whether the next preprocessor token to be 2391 /// lexed is a '('. If so, consume the token and return true, if not, this 2392 /// method should have no observable side-effect on the lexed tokens. 2393 bool isNextPPTokenLParen(); 2394 2395 /// After reading "MACRO(", this method is invoked to read all of the formal 2396 /// arguments specified for the macro invocation. Returns null on error. 2397 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, 2398 SourceLocation &MacroEnd); 2399 2400 /// If an identifier token is read that is to be expanded 2401 /// as a builtin macro, handle it and return the next token as 'Tok'. 2402 void ExpandBuiltinMacro(Token &Tok); 2403 2404 /// Read a \c _Pragma directive, slice it up, process it, then 2405 /// return the first token after the directive. 2406 /// This assumes that the \c _Pragma token has just been read into \p Tok. 2407 void Handle_Pragma(Token &Tok); 2408 2409 /// Like Handle_Pragma except the pragma text is not enclosed within 2410 /// a string literal. 2411 void HandleMicrosoft__pragma(Token &Tok); 2412 2413 /// Add a lexer to the top of the include stack and 2414 /// start lexing tokens from it instead of the current buffer. 2415 void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir); 2416 2417 /// Set the FileID for the preprocessor predefines. 2418 void setPredefinesFileID(FileID FID) { 2419 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 2420 PredefinesFileID = FID; 2421 } 2422 2423 /// Set the FileID for the PCH through header. 2424 void setPCHThroughHeaderFileID(FileID FID); 2425 2426 /// Returns true if we are lexing from a file and not a 2427 /// pragma or a macro. 2428 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 2429 return L ? !L->isPragmaLexer() : P != nullptr; 2430 } 2431 2432 static bool IsFileLexer(const IncludeStackInfo& I) { 2433 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 2434 } 2435 2436 bool IsFileLexer() const { 2437 return IsFileLexer(CurLexer.get(), CurPPLexer); 2438 } 2439 2440 //===--------------------------------------------------------------------===// 2441 // Caching stuff. 2442 void CachingLex(Token &Result); 2443 2444 bool InCachingLexMode() const { 2445 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 2446 // that we are past EOF, not that we are in CachingLex mode. 2447 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty(); 2448 } 2449 2450 void EnterCachingLexMode(); 2451 void EnterCachingLexModeUnchecked(); 2452 2453 void ExitCachingLexMode() { 2454 if (InCachingLexMode()) 2455 RemoveTopOfLexerStack(); 2456 } 2457 2458 const Token &PeekAhead(unsigned N); 2459 void AnnotatePreviousCachedTokens(const Token &Tok); 2460 2461 //===--------------------------------------------------------------------===// 2462 /// Handle*Directive - implement the various preprocessor directives. These 2463 /// should side-effect the current preprocessor object so that the next call 2464 /// to Lex() will return the appropriate token next. 2465 void HandleLineDirective(); 2466 void HandleDigitDirective(Token &Tok); 2467 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 2468 void HandleIdentSCCSDirective(Token &Tok); 2469 void HandleMacroPublicDirective(Token &Tok); 2470 void HandleMacroPrivateDirective(); 2471 2472 /// An additional notification that can be produced by a header inclusion or 2473 /// import to tell the parser what happened. 2474 struct ImportAction { 2475 enum ActionKind { 2476 None, 2477 ModuleBegin, 2478 ModuleImport, 2479 HeaderUnitImport, 2480 SkippedModuleImport, 2481 Failure, 2482 } Kind; 2483 Module *ModuleForHeader = nullptr; 2484 2485 ImportAction(ActionKind AK, Module *Mod = nullptr) 2486 : Kind(AK), ModuleForHeader(Mod) { 2487 assert((AK == None || Mod || AK == Failure) && 2488 "no module for module action"); 2489 } 2490 }; 2491 2492 Optional<FileEntryRef> LookupHeaderIncludeOrImport( 2493 ConstSearchDirIterator *CurDir, StringRef &Filename, 2494 SourceLocation FilenameLoc, CharSourceRange FilenameRange, 2495 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, 2496 bool &IsMapped, ConstSearchDirIterator LookupFrom, 2497 const FileEntry *LookupFromFile, StringRef &LookupFilename, 2498 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, 2499 ModuleMap::KnownHeader &SuggestedModule, bool isAngled); 2500 2501 // File inclusion. 2502 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, 2503 ConstSearchDirIterator LookupFrom = nullptr, 2504 const FileEntry *LookupFromFile = nullptr); 2505 ImportAction 2506 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok, 2507 Token &FilenameTok, SourceLocation EndLoc, 2508 ConstSearchDirIterator LookupFrom = nullptr, 2509 const FileEntry *LookupFromFile = nullptr); 2510 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 2511 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 2512 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 2513 void HandleMicrosoftImportDirective(Token &Tok); 2514 2515 public: 2516 /// Check that the given module is available, producing a diagnostic if not. 2517 /// \return \c true if the check failed (because the module is not available). 2518 /// \c false if the module appears to be usable. 2519 static bool checkModuleIsAvailable(const LangOptions &LangOpts, 2520 const TargetInfo &TargetInfo, 2521 DiagnosticsEngine &Diags, Module *M); 2522 2523 // Module inclusion testing. 2524 /// Find the module that owns the source or header file that 2525 /// \p Loc points to. If the location is in a file that was included 2526 /// into a module, or is outside any module, returns nullptr. 2527 Module *getModuleForLocation(SourceLocation Loc); 2528 2529 /// We want to produce a diagnostic at location IncLoc concerning an 2530 /// unreachable effect at location MLoc (eg, where a desired entity was 2531 /// declared or defined). Determine whether the right way to make MLoc 2532 /// reachable is by #include, and if so, what header should be included. 2533 /// 2534 /// This is not necessarily fast, and might load unexpected module maps, so 2535 /// should only be called by code that intends to produce an error. 2536 /// 2537 /// \param IncLoc The location at which the missing effect was detected. 2538 /// \param MLoc A location within an unimported module at which the desired 2539 /// effect occurred. 2540 /// \return A file that can be #included to provide the desired effect. Null 2541 /// if no such file could be determined or if a #include is not 2542 /// appropriate (eg, if a module should be imported instead). 2543 const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 2544 SourceLocation MLoc); 2545 2546 bool isRecordingPreamble() const { 2547 return PreambleConditionalStack.isRecording(); 2548 } 2549 2550 bool hasRecordedPreamble() const { 2551 return PreambleConditionalStack.hasRecordedPreamble(); 2552 } 2553 2554 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const { 2555 return PreambleConditionalStack.getStack(); 2556 } 2557 2558 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2559 PreambleConditionalStack.setStack(s); 2560 } 2561 2562 void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s, 2563 llvm::Optional<PreambleSkipInfo> SkipInfo) { 2564 PreambleConditionalStack.startReplaying(); 2565 PreambleConditionalStack.setStack(s); 2566 PreambleConditionalStack.SkipInfo = SkipInfo; 2567 } 2568 2569 llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const { 2570 return PreambleConditionalStack.SkipInfo; 2571 } 2572 2573 private: 2574 /// After processing predefined file, initialize the conditional stack from 2575 /// the preamble. 2576 void replayPreambleConditionalStack(); 2577 2578 // Macro handling. 2579 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard); 2580 void HandleUndefDirective(); 2581 2582 // Conditional Inclusion. 2583 void HandleIfdefDirective(Token &Result, const Token &HashToken, 2584 bool isIfndef, bool ReadAnyTokensBeforeDirective); 2585 void HandleIfDirective(Token &IfToken, const Token &HashToken, 2586 bool ReadAnyTokensBeforeDirective); 2587 void HandleEndifDirective(Token &EndifToken); 2588 void HandleElseDirective(Token &Result, const Token &HashToken); 2589 void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken, 2590 tok::PPKeywordKind Kind); 2591 2592 // Pragmas. 2593 void HandlePragmaDirective(PragmaIntroducer Introducer); 2594 2595 public: 2596 void HandlePragmaOnce(Token &OnceTok); 2597 void HandlePragmaMark(Token &MarkTok); 2598 void HandlePragmaPoison(); 2599 void HandlePragmaSystemHeader(Token &SysHeaderTok); 2600 void HandlePragmaDependency(Token &DependencyTok); 2601 void HandlePragmaPushMacro(Token &Tok); 2602 void HandlePragmaPopMacro(Token &Tok); 2603 void HandlePragmaIncludeAlias(Token &Tok); 2604 void HandlePragmaModuleBuild(Token &Tok); 2605 void HandlePragmaHdrstop(Token &Tok); 2606 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 2607 2608 // Return true and store the first token only if any CommentHandler 2609 // has inserted some tokens and getCommentRetentionState() is false. 2610 bool HandleComment(Token &result, SourceRange Comment); 2611 2612 /// A macro is used, update information about macros that need unused 2613 /// warnings. 2614 void markMacroAsUsed(MacroInfo *MI); 2615 2616 void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg, 2617 SourceLocation AnnotationLoc) { 2618 auto Annotations = AnnotationInfos.find(II); 2619 if (Annotations == AnnotationInfos.end()) 2620 AnnotationInfos.insert(std::make_pair( 2621 II, 2622 MacroAnnotations::makeDeprecation(AnnotationLoc, std::move(Msg)))); 2623 else 2624 Annotations->second.DeprecationInfo = 2625 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2626 } 2627 2628 void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg, 2629 SourceLocation AnnotationLoc) { 2630 auto Annotations = AnnotationInfos.find(II); 2631 if (Annotations == AnnotationInfos.end()) 2632 AnnotationInfos.insert( 2633 std::make_pair(II, MacroAnnotations::makeRestrictExpansion( 2634 AnnotationLoc, std::move(Msg)))); 2635 else 2636 Annotations->second.RestrictExpansionInfo = 2637 MacroAnnotationInfo{AnnotationLoc, std::move(Msg)}; 2638 } 2639 2640 void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) { 2641 auto Annotations = AnnotationInfos.find(II); 2642 if (Annotations == AnnotationInfos.end()) 2643 AnnotationInfos.insert( 2644 std::make_pair(II, MacroAnnotations::makeFinal(AnnotationLoc))); 2645 else 2646 Annotations->second.FinalAnnotationLoc = AnnotationLoc; 2647 } 2648 2649 const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const { 2650 return AnnotationInfos.find(II)->second; 2651 } 2652 2653 void emitMacroExpansionWarnings(const Token &Identifier) const { 2654 if (Identifier.getIdentifierInfo()->isDeprecatedMacro()) 2655 emitMacroDeprecationWarning(Identifier); 2656 2657 if (Identifier.getIdentifierInfo()->isRestrictExpansion() && 2658 !SourceMgr.isInMainFile(Identifier.getLocation())) 2659 emitRestrictExpansionWarning(Identifier); 2660 } 2661 2662 static void processPathForFileMacro(SmallVectorImpl<char> &Path, 2663 const LangOptions &LangOpts, 2664 const TargetInfo &TI); 2665 2666 private: 2667 void emitMacroDeprecationWarning(const Token &Identifier) const; 2668 void emitRestrictExpansionWarning(const Token &Identifier) const; 2669 void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const; 2670 }; 2671 2672 /// Abstract base class that describes a handler that will receive 2673 /// source ranges for each of the comments encountered in the source file. 2674 class CommentHandler { 2675 public: 2676 virtual ~CommentHandler(); 2677 2678 // The handler shall return true if it has pushed any tokens 2679 // to be read using e.g. EnterToken or EnterTokenStream. 2680 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 2681 }; 2682 2683 /// Abstract base class that describes a handler that will receive 2684 /// source ranges for empty lines encountered in the source file. 2685 class EmptylineHandler { 2686 public: 2687 virtual ~EmptylineHandler(); 2688 2689 // The handler handles empty lines. 2690 virtual void HandleEmptyline(SourceRange Range) = 0; 2691 }; 2692 2693 /// Registry of pragma handlers added by plugins 2694 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; 2695 2696 } // namespace clang 2697 2698 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H 2699