1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 15 #define LLVM_CLANG_LEX_PREPROCESSOR_H 16 17 #include "clang/Basic/Diagnostic.h" 18 #include "clang/Basic/IdentifierTable.h" 19 #include "clang/Basic/LLVM.h" 20 #include "clang/Basic/LangOptions.h" 21 #include "clang/Basic/Module.h" 22 #include "clang/Basic/SourceLocation.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Basic/TokenKinds.h" 25 #include "clang/Lex/Lexer.h" 26 #include "clang/Lex/MacroInfo.h" 27 #include "clang/Lex/ModuleLoader.h" 28 #include "clang/Lex/ModuleMap.h" 29 #include "clang/Lex/PPCallbacks.h" 30 #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" 31 #include "clang/Lex/Token.h" 32 #include "clang/Lex/TokenLexer.h" 33 #include "llvm/ADT/ArrayRef.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/FoldingSet.h" 36 #include "llvm/ADT/FunctionExtras.h" 37 #include "llvm/ADT/None.h" 38 #include "llvm/ADT/Optional.h" 39 #include "llvm/ADT/PointerUnion.h" 40 #include "llvm/ADT/STLExtras.h" 41 #include "llvm/ADT/SmallPtrSet.h" 42 #include "llvm/ADT/SmallVector.h" 43 #include "llvm/ADT/StringRef.h" 44 #include "llvm/ADT/TinyPtrVector.h" 45 #include "llvm/ADT/iterator_range.h" 46 #include "llvm/Support/Allocator.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/Registry.h" 49 #include <cassert> 50 #include <cstddef> 51 #include <cstdint> 52 #include <map> 53 #include <memory> 54 #include <string> 55 #include <utility> 56 #include <vector> 57 58 namespace llvm { 59 60 template<unsigned InternalLen> class SmallString; 61 62 } // namespace llvm 63 64 namespace clang { 65 66 class CodeCompletionHandler; 67 class CommentHandler; 68 class DirectoryEntry; 69 class DirectoryLookup; 70 class EmptylineHandler; 71 class ExternalPreprocessorSource; 72 class FileEntry; 73 class FileManager; 74 class HeaderSearch; 75 class MacroArgs; 76 class PragmaHandler; 77 class PragmaNamespace; 78 class PreprocessingRecord; 79 class PreprocessorLexer; 80 class PreprocessorOptions; 81 class ScratchBuffer; 82 class TargetInfo; 83 84 namespace Builtin { 85 class Context; 86 } 87 88 /// Stores token information for comparing actual tokens with 89 /// predefined values. Only handles simple tokens and identifiers. 90 class TokenValue { 91 tok::TokenKind Kind; 92 IdentifierInfo *II; 93 94 public: TokenValue(tok::TokenKind Kind)95 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 96 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 97 assert(Kind != tok::identifier && 98 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 99 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 100 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 101 } 102 TokenValue(IdentifierInfo * II)103 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 104 105 bool operator==(const Token &Tok) const { 106 return Tok.getKind() == Kind && 107 (!II || II == Tok.getIdentifierInfo()); 108 } 109 }; 110 111 /// Context in which macro name is used. 112 enum MacroUse { 113 // other than #define or #undef 114 MU_Other = 0, 115 116 // macro name specified in #define 117 MU_Define = 1, 118 119 // macro name specified in #undef 120 MU_Undef = 2 121 }; 122 123 /// Engages in a tight little dance with the lexer to efficiently 124 /// preprocess tokens. 125 /// 126 /// Lexers know only about tokens within a single source file, and don't 127 /// know anything about preprocessor-level issues like the \#include stack, 128 /// token expansion, etc. 129 class Preprocessor { 130 friend class VAOptDefinitionContext; 131 friend class VariadicMacroScopeGuard; 132 133 llvm::unique_function<void(const clang::Token &)> OnToken; 134 std::shared_ptr<PreprocessorOptions> PPOpts; 135 DiagnosticsEngine *Diags; 136 LangOptions &LangOpts; 137 const TargetInfo *Target = nullptr; 138 const TargetInfo *AuxTarget = nullptr; 139 FileManager &FileMgr; 140 SourceManager &SourceMgr; 141 std::unique_ptr<ScratchBuffer> ScratchBuf; 142 HeaderSearch &HeaderInfo; 143 ModuleLoader &TheModuleLoader; 144 145 /// External source of macros. 146 ExternalPreprocessorSource *ExternalSource; 147 148 /// A BumpPtrAllocator object used to quickly allocate and release 149 /// objects internal to the Preprocessor. 150 llvm::BumpPtrAllocator BP; 151 152 /// Identifiers for builtin macros and other builtins. 153 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 154 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 155 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 156 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 157 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__ 158 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 159 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 160 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 161 IdentifierInfo *Ident__identifier; // __identifier 162 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 163 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__ 164 IdentifierInfo *Ident__has_feature; // __has_feature 165 IdentifierInfo *Ident__has_extension; // __has_extension 166 IdentifierInfo *Ident__has_builtin; // __has_builtin 167 IdentifierInfo *Ident__has_attribute; // __has_attribute 168 IdentifierInfo *Ident__has_include; // __has_include 169 IdentifierInfo *Ident__has_include_next; // __has_include_next 170 IdentifierInfo *Ident__has_warning; // __has_warning 171 IdentifierInfo *Ident__is_identifier; // __is_identifier 172 IdentifierInfo *Ident__building_module; // __building_module 173 IdentifierInfo *Ident__MODULE__; // __MODULE__ 174 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 175 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute 176 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 177 IdentifierInfo *Ident__is_target_arch; // __is_target_arch 178 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor 179 IdentifierInfo *Ident__is_target_os; // __is_target_os 180 IdentifierInfo *Ident__is_target_environment; // __is_target_environment 181 182 // Weak, only valid (and set) while InMacroArgs is true. 183 Token* ArgMacro; 184 185 SourceLocation DATELoc, TIMELoc; 186 187 // Next __COUNTER__ value, starts at 0. 188 unsigned CounterValue = 0; 189 190 enum { 191 /// Maximum depth of \#includes. 192 MaxAllowedIncludeStackDepth = 200 193 }; 194 195 // State that is set before the preprocessor begins. 196 bool KeepComments : 1; 197 bool KeepMacroComments : 1; 198 bool SuppressIncludeNotFoundError : 1; 199 200 // State that changes while the preprocessor runs: 201 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 202 203 /// Whether the preprocessor owns the header search object. 204 bool OwnsHeaderSearch : 1; 205 206 /// True if macro expansion is disabled. 207 bool DisableMacroExpansion : 1; 208 209 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 210 /// when parsing preprocessor directives. 211 bool MacroExpansionInDirectivesOverride : 1; 212 213 class ResetMacroExpansionHelper; 214 215 /// Whether we have already loaded macros from the external source. 216 mutable bool ReadMacrosFromExternalSource : 1; 217 218 /// True if pragmas are enabled. 219 bool PragmasEnabled : 1; 220 221 /// True if the current build action is a preprocessing action. 222 bool PreprocessedOutput : 1; 223 224 /// True if we are currently preprocessing a #if or #elif directive 225 bool ParsingIfOrElifDirective; 226 227 /// True if we are pre-expanding macro arguments. 228 bool InMacroArgPreExpansion; 229 230 /// Mapping/lookup information for all identifiers in 231 /// the program, including program keywords. 232 mutable IdentifierTable Identifiers; 233 234 /// This table contains all the selectors in the program. 235 /// 236 /// Unlike IdentifierTable above, this table *isn't* populated by the 237 /// preprocessor. It is declared/expanded here because its role/lifetime is 238 /// conceptually similar to the IdentifierTable. In addition, the current 239 /// control flow (in clang::ParseAST()), make it convenient to put here. 240 /// 241 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 242 /// the lifetime of the preprocessor. 243 SelectorTable Selectors; 244 245 /// Information about builtins. 246 std::unique_ptr<Builtin::Context> BuiltinInfo; 247 248 /// Tracks all of the pragmas that the client registered 249 /// with this preprocessor. 250 std::unique_ptr<PragmaNamespace> PragmaHandlers; 251 252 /// Pragma handlers of the original source is stored here during the 253 /// parsing of a model file. 254 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 255 256 /// Tracks all of the comment handlers that the client registered 257 /// with this preprocessor. 258 std::vector<CommentHandler *> CommentHandlers; 259 260 /// Empty line handler. 261 EmptylineHandler *Emptyline = nullptr; 262 263 /// True if we want to ignore EOF token and continue later on (thus 264 /// avoid tearing the Lexer and etc. down). 265 bool IncrementalProcessing = false; 266 267 /// The kind of translation unit we are processing. 268 TranslationUnitKind TUKind; 269 270 /// The code-completion handler. 271 CodeCompletionHandler *CodeComplete = nullptr; 272 273 /// The file that we're performing code-completion for, if any. 274 const FileEntry *CodeCompletionFile = nullptr; 275 276 /// The offset in file for the code-completion point. 277 unsigned CodeCompletionOffset = 0; 278 279 /// The location for the code-completion point. This gets instantiated 280 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 281 SourceLocation CodeCompletionLoc; 282 283 /// The start location for the file of the code-completion point. 284 /// 285 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 286 /// for preprocessing. 287 SourceLocation CodeCompletionFileLoc; 288 289 /// The source location of the \c import contextual keyword we just 290 /// lexed, if any. 291 SourceLocation ModuleImportLoc; 292 293 /// The module import path that we're currently processing. 294 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath; 295 296 /// Whether the last token we lexed was an '@'. 297 bool LastTokenWasAt = false; 298 299 /// A position within a C++20 import-seq. 300 class ImportSeq { 301 public: 302 enum State : int { 303 // Positive values represent a number of unclosed brackets. 304 AtTopLevel = 0, 305 AfterTopLevelTokenSeq = -1, 306 AfterExport = -2, 307 AfterImportSeq = -3, 308 }; 309 ImportSeq(State S)310 ImportSeq(State S) : S(S) {} 311 312 /// Saw any kind of open bracket. handleOpenBracket()313 void handleOpenBracket() { 314 S = static_cast<State>(std::max<int>(S, 0) + 1); 315 } 316 /// Saw any kind of close bracket other than '}'. handleCloseBracket()317 void handleCloseBracket() { 318 S = static_cast<State>(std::max<int>(S, 1) - 1); 319 } 320 /// Saw a close brace. handleCloseBrace()321 void handleCloseBrace() { 322 handleCloseBracket(); 323 if (S == AtTopLevel && !AfterHeaderName) 324 S = AfterTopLevelTokenSeq; 325 } 326 /// Saw a semicolon. handleSemi()327 void handleSemi() { 328 if (atTopLevel()) { 329 S = AfterTopLevelTokenSeq; 330 AfterHeaderName = false; 331 } 332 } 333 334 /// Saw an 'export' identifier. handleExport()335 void handleExport() { 336 if (S == AfterTopLevelTokenSeq) 337 S = AfterExport; 338 else if (S <= 0) 339 S = AtTopLevel; 340 } 341 /// Saw an 'import' identifier. handleImport()342 void handleImport() { 343 if (S == AfterTopLevelTokenSeq || S == AfterExport) 344 S = AfterImportSeq; 345 else if (S <= 0) 346 S = AtTopLevel; 347 } 348 349 /// Saw a 'header-name' token; do not recognize any more 'import' tokens 350 /// until we reach a top-level semicolon. handleHeaderName()351 void handleHeaderName() { 352 if (S == AfterImportSeq) 353 AfterHeaderName = true; 354 handleMisc(); 355 } 356 357 /// Saw any other token. handleMisc()358 void handleMisc() { 359 if (S <= 0) 360 S = AtTopLevel; 361 } 362 atTopLevel()363 bool atTopLevel() { return S <= 0; } afterImportSeq()364 bool afterImportSeq() { return S == AfterImportSeq; } 365 366 private: 367 State S; 368 /// Whether we're in the pp-import-suffix following the header-name in a 369 /// pp-import. If so, a close-brace is not sufficient to end the 370 /// top-level-token-seq of an import-seq. 371 bool AfterHeaderName = false; 372 }; 373 374 /// Our current position within a C++20 import-seq. 375 ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq; 376 377 /// Whether the module import expects an identifier next. Otherwise, 378 /// it expects a '.' or ';'. 379 bool ModuleImportExpectsIdentifier = false; 380 381 /// The identifier and source location of the currently-active 382 /// \#pragma clang arc_cf_code_audited begin. 383 std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo; 384 385 /// The source location of the currently-active 386 /// \#pragma clang assume_nonnull begin. 387 SourceLocation PragmaAssumeNonNullLoc; 388 389 /// True if we hit the code-completion point. 390 bool CodeCompletionReached = false; 391 392 /// The code completion token containing the information 393 /// on the stem that is to be code completed. 394 IdentifierInfo *CodeCompletionII = nullptr; 395 396 /// Range for the code completion token. 397 SourceRange CodeCompletionTokenRange; 398 399 /// The directory that the main file should be considered to occupy, 400 /// if it does not correspond to a real file (as happens when building a 401 /// module). 402 const DirectoryEntry *MainFileDir = nullptr; 403 404 /// The number of bytes that we will initially skip when entering the 405 /// main file, along with a flag that indicates whether skipping this number 406 /// of bytes will place the lexer at the start of a line. 407 /// 408 /// This is used when loading a precompiled preamble. 409 std::pair<int, bool> SkipMainFilePreamble; 410 411 /// Whether we hit an error due to reaching max allowed include depth. Allows 412 /// to avoid hitting the same error over and over again. 413 bool HasReachedMaxIncludeDepth = false; 414 415 /// The number of currently-active calls to Lex. 416 /// 417 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often 418 /// require asking for multiple additional tokens. This counter makes it 419 /// possible for Lex to detect whether it's producing a token for the end 420 /// of phase 4 of translation or for some other situation. 421 unsigned LexLevel = 0; 422 423 /// The number of (LexLevel 0) preprocessor tokens. 424 unsigned TokenCount = 0; 425 426 /// Preprocess every token regardless of LexLevel. 427 bool PreprocessToken = false; 428 429 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens 430 /// warning, or zero for unlimited. 431 unsigned MaxTokens = 0; 432 SourceLocation MaxTokensOverrideLoc; 433 434 public: 435 struct PreambleSkipInfo { 436 SourceLocation HashTokenLoc; 437 SourceLocation IfTokenLoc; 438 bool FoundNonSkipPortion; 439 bool FoundElse; 440 SourceLocation ElseLoc; 441 PreambleSkipInfoPreambleSkipInfo442 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, 443 bool FoundNonSkipPortion, bool FoundElse, 444 SourceLocation ElseLoc) 445 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc), 446 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse), 447 ElseLoc(ElseLoc) {} 448 }; 449 450 private: 451 friend class ASTReader; 452 friend class MacroArgs; 453 454 class PreambleConditionalStackStore { 455 enum State { 456 Off = 0, 457 Recording = 1, 458 Replaying = 2, 459 }; 460 461 public: 462 PreambleConditionalStackStore() = default; 463 startRecording()464 void startRecording() { ConditionalStackState = Recording; } startReplaying()465 void startReplaying() { ConditionalStackState = Replaying; } isRecording()466 bool isRecording() const { return ConditionalStackState == Recording; } isReplaying()467 bool isReplaying() const { return ConditionalStackState == Replaying; } 468 getStack()469 ArrayRef<PPConditionalInfo> getStack() const { 470 return ConditionalStack; 471 } 472 doneReplaying()473 void doneReplaying() { 474 ConditionalStack.clear(); 475 ConditionalStackState = Off; 476 } 477 setStack(ArrayRef<PPConditionalInfo> s)478 void setStack(ArrayRef<PPConditionalInfo> s) { 479 if (!isRecording() && !isReplaying()) 480 return; 481 ConditionalStack.clear(); 482 ConditionalStack.append(s.begin(), s.end()); 483 } 484 hasRecordedPreamble()485 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); } 486 reachedEOFWhileSkipping()487 bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); } 488 clearSkipInfo()489 void clearSkipInfo() { SkipInfo.reset(); } 490 491 llvm::Optional<PreambleSkipInfo> SkipInfo; 492 493 private: 494 SmallVector<PPConditionalInfo, 4> ConditionalStack; 495 State ConditionalStackState = Off; 496 } PreambleConditionalStack; 497 498 /// The current top of the stack that we're lexing from if 499 /// not expanding a macro and we are lexing directly from source code. 500 /// 501 /// Only one of CurLexer, or CurTokenLexer will be non-null. 502 std::unique_ptr<Lexer> CurLexer; 503 504 /// The current top of the stack what we're lexing from 505 /// if not expanding a macro. 506 /// 507 /// This is an alias for CurLexer. 508 PreprocessorLexer *CurPPLexer = nullptr; 509 510 /// Used to find the current FileEntry, if CurLexer is non-null 511 /// and if applicable. 512 /// 513 /// This allows us to implement \#include_next and find directory-specific 514 /// properties. 515 const DirectoryLookup *CurDirLookup = nullptr; 516 517 /// The current macro we are expanding, if we are expanding a macro. 518 /// 519 /// One of CurLexer and CurTokenLexer must be null. 520 std::unique_ptr<TokenLexer> CurTokenLexer; 521 522 /// The kind of lexer we're currently working with. 523 enum CurLexerKind { 524 CLK_Lexer, 525 CLK_TokenLexer, 526 CLK_CachingLexer, 527 CLK_LexAfterModuleImport 528 } CurLexerKind = CLK_Lexer; 529 530 /// If the current lexer is for a submodule that is being built, this 531 /// is that submodule. 532 Module *CurLexerSubmodule = nullptr; 533 534 /// Keeps track of the stack of files currently 535 /// \#included, and macros currently being expanded from, not counting 536 /// CurLexer/CurTokenLexer. 537 struct IncludeStackInfo { 538 enum CurLexerKind CurLexerKind; 539 Module *TheSubmodule; 540 std::unique_ptr<Lexer> TheLexer; 541 PreprocessorLexer *ThePPLexer; 542 std::unique_ptr<TokenLexer> TheTokenLexer; 543 const DirectoryLookup *TheDirLookup; 544 545 // The following constructors are completely useless copies of the default 546 // versions, only needed to pacify MSVC. IncludeStackInfoIncludeStackInfo547 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, 548 std::unique_ptr<Lexer> &&TheLexer, 549 PreprocessorLexer *ThePPLexer, 550 std::unique_ptr<TokenLexer> &&TheTokenLexer, 551 const DirectoryLookup *TheDirLookup) 552 : CurLexerKind(std::move(CurLexerKind)), 553 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 554 ThePPLexer(std::move(ThePPLexer)), 555 TheTokenLexer(std::move(TheTokenLexer)), 556 TheDirLookup(std::move(TheDirLookup)) {} 557 }; 558 std::vector<IncludeStackInfo> IncludeMacroStack; 559 560 /// Actions invoked when some preprocessor activity is 561 /// encountered (e.g. a file is \#included, etc). 562 std::unique_ptr<PPCallbacks> Callbacks; 563 564 struct MacroExpandsInfo { 565 Token Tok; 566 MacroDefinition MD; 567 SourceRange Range; 568 MacroExpandsInfoMacroExpandsInfo569 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 570 : Tok(Tok), MD(MD), Range(Range) {} 571 }; 572 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 573 574 /// Information about a name that has been used to define a module macro. 575 struct ModuleMacroInfo { 576 /// The most recent macro directive for this identifier. 577 MacroDirective *MD; 578 579 /// The active module macros for this identifier. 580 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros; 581 582 /// The generation number at which we last updated ActiveModuleMacros. 583 /// \see Preprocessor::VisibleModules. 584 unsigned ActiveModuleMacrosGeneration = 0; 585 586 /// Whether this macro name is ambiguous. 587 bool IsAmbiguous = false; 588 589 /// The module macros that are overridden by this macro. 590 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros; 591 ModuleMacroInfoModuleMacroInfo592 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {} 593 }; 594 595 /// The state of a macro for an identifier. 596 class MacroState { 597 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 598 getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)599 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 600 const IdentifierInfo *II) const { 601 if (II->isOutOfDate()) 602 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 603 // FIXME: Find a spare bit on IdentifierInfo and store a 604 // HasModuleMacros flag. 605 if (!II->hasMacroDefinition() || 606 (!PP.getLangOpts().Modules && 607 !PP.getLangOpts().ModulesLocalVisibility) || 608 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 609 return nullptr; 610 611 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 612 if (!Info) { 613 Info = new (PP.getPreprocessorAllocator()) 614 ModuleMacroInfo(State.get<MacroDirective *>()); 615 State = Info; 616 } 617 618 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 619 Info->ActiveModuleMacrosGeneration) 620 PP.updateModuleMacroInfo(II, *Info); 621 return Info; 622 } 623 624 public: MacroState()625 MacroState() : MacroState(nullptr) {} MacroState(MacroDirective * MD)626 MacroState(MacroDirective *MD) : State(MD) {} 627 MacroState(MacroState && O)628 MacroState(MacroState &&O) noexcept : State(O.State) { 629 O.State = (MacroDirective *)nullptr; 630 } 631 632 MacroState &operator=(MacroState &&O) noexcept { 633 auto S = O.State; 634 O.State = (MacroDirective *)nullptr; 635 State = S; 636 return *this; 637 } 638 ~MacroState()639 ~MacroState() { 640 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 641 Info->~ModuleMacroInfo(); 642 } 643 getLatest()644 MacroDirective *getLatest() const { 645 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 646 return Info->MD; 647 return State.get<MacroDirective*>(); 648 } 649 setLatest(MacroDirective * MD)650 void setLatest(MacroDirective *MD) { 651 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 652 Info->MD = MD; 653 else 654 State = MD; 655 } 656 isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)657 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 658 auto *Info = getModuleInfo(PP, II); 659 return Info ? Info->IsAmbiguous : false; 660 } 661 662 ArrayRef<ModuleMacro *> getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)663 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 664 if (auto *Info = getModuleInfo(PP, II)) 665 return Info->ActiveModuleMacros; 666 return None; 667 } 668 findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)669 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 670 SourceManager &SourceMgr) const { 671 // FIXME: Incorporate module macros into the result of this. 672 if (auto *Latest = getLatest()) 673 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 674 return {}; 675 } 676 overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)677 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 678 if (auto *Info = getModuleInfo(PP, II)) { 679 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 680 Info->ActiveModuleMacros.begin(), 681 Info->ActiveModuleMacros.end()); 682 Info->ActiveModuleMacros.clear(); 683 Info->IsAmbiguous = false; 684 } 685 } 686 getOverriddenMacros()687 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 688 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 689 return Info->OverriddenMacros; 690 return None; 691 } 692 setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)693 void setOverriddenMacros(Preprocessor &PP, 694 ArrayRef<ModuleMacro *> Overrides) { 695 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 696 if (!Info) { 697 if (Overrides.empty()) 698 return; 699 Info = new (PP.getPreprocessorAllocator()) 700 ModuleMacroInfo(State.get<MacroDirective *>()); 701 State = Info; 702 } 703 Info->OverriddenMacros.clear(); 704 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 705 Overrides.begin(), Overrides.end()); 706 Info->ActiveModuleMacrosGeneration = 0; 707 } 708 }; 709 710 /// For each IdentifierInfo that was associated with a macro, we 711 /// keep a mapping to the history of all macro definitions and #undefs in 712 /// the reverse order (the latest one is in the head of the list). 713 /// 714 /// This mapping lives within the \p CurSubmoduleState. 715 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>; 716 717 struct SubmoduleState; 718 719 /// Information about a submodule that we're currently building. 720 struct BuildingSubmoduleInfo { 721 /// The module that we are building. 722 Module *M; 723 724 /// The location at which the module was included. 725 SourceLocation ImportLoc; 726 727 /// Whether we entered this submodule via a pragma. 728 bool IsPragma; 729 730 /// The previous SubmoduleState. 731 SubmoduleState *OuterSubmoduleState; 732 733 /// The number of pending module macro names when we started building this. 734 unsigned OuterPendingModuleMacroNames; 735 BuildingSubmoduleInfoBuildingSubmoduleInfo736 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma, 737 SubmoduleState *OuterSubmoduleState, 738 unsigned OuterPendingModuleMacroNames) 739 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma), 740 OuterSubmoduleState(OuterSubmoduleState), 741 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 742 }; 743 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 744 745 /// Information about a submodule's preprocessor state. 746 struct SubmoduleState { 747 /// The macros for the submodule. 748 MacroMap Macros; 749 750 /// The set of modules that are visible within the submodule. 751 VisibleModuleSet VisibleModules; 752 753 // FIXME: CounterValue? 754 // FIXME: PragmaPushMacroInfo? 755 }; 756 std::map<Module *, SubmoduleState> Submodules; 757 758 /// The preprocessor state for preprocessing outside of any submodule. 759 SubmoduleState NullSubmoduleState; 760 761 /// The current submodule state. Will be \p NullSubmoduleState if we're not 762 /// in a submodule. 763 SubmoduleState *CurSubmoduleState; 764 765 /// The set of known macros exported from modules. 766 llvm::FoldingSet<ModuleMacro> ModuleMacros; 767 768 /// The names of potential module macros that we've not yet processed. 769 llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames; 770 771 /// The list of module macros, for each identifier, that are not overridden by 772 /// any other module macro. 773 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>> 774 LeafModuleMacros; 775 776 /// Macros that we want to warn because they are not used at the end 777 /// of the translation unit. 778 /// 779 /// We store just their SourceLocations instead of 780 /// something like MacroInfo*. The benefit of this is that when we are 781 /// deserializing from PCH, we don't need to deserialize identifier & macros 782 /// just so that we can report that they are unused, we just warn using 783 /// the SourceLocations of this set (that will be filled by the ASTReader). 784 /// We are using SmallPtrSet instead of a vector for faster removal. 785 using WarnUnusedMacroLocsTy = llvm::SmallPtrSet<SourceLocation, 32>; 786 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 787 788 /// A "freelist" of MacroArg objects that can be 789 /// reused for quick allocation. 790 MacroArgs *MacroArgCache = nullptr; 791 792 /// For each IdentifierInfo used in a \#pragma push_macro directive, 793 /// we keep a MacroInfo stack used to restore the previous macro value. 794 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>> 795 PragmaPushMacroInfo; 796 797 // Various statistics we track for performance analysis. 798 unsigned NumDirectives = 0; 799 unsigned NumDefined = 0; 800 unsigned NumUndefined = 0; 801 unsigned NumPragma = 0; 802 unsigned NumIf = 0; 803 unsigned NumElse = 0; 804 unsigned NumEndif = 0; 805 unsigned NumEnteredSourceFiles = 0; 806 unsigned MaxIncludeStackDepth = 0; 807 unsigned NumMacroExpanded = 0; 808 unsigned NumFnMacroExpanded = 0; 809 unsigned NumBuiltinMacroExpanded = 0; 810 unsigned NumFastMacroExpanded = 0; 811 unsigned NumTokenPaste = 0; 812 unsigned NumFastTokenPaste = 0; 813 unsigned NumSkipped = 0; 814 815 /// The predefined macros that preprocessor should use from the 816 /// command line etc. 817 std::string Predefines; 818 819 /// The file ID for the preprocessor predefines. 820 FileID PredefinesFileID; 821 822 /// The file ID for the PCH through header. 823 FileID PCHThroughHeaderFileID; 824 825 /// Whether tokens are being skipped until a #pragma hdrstop is seen. 826 bool SkippingUntilPragmaHdrStop = false; 827 828 /// Whether tokens are being skipped until the through header is seen. 829 bool SkippingUntilPCHThroughHeader = false; 830 831 /// \{ 832 /// Cache of macro expanders to reduce malloc traffic. 833 enum { TokenLexerCacheSize = 8 }; 834 unsigned NumCachedTokenLexers; 835 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 836 /// \} 837 838 /// Keeps macro expanded tokens for TokenLexers. 839 // 840 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 841 /// going to lex in the cache and when it finishes the tokens are removed 842 /// from the end of the cache. 843 SmallVector<Token, 16> MacroExpandedTokens; 844 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack; 845 846 /// A record of the macro definitions and expansions that 847 /// occurred during preprocessing. 848 /// 849 /// This is an optional side structure that can be enabled with 850 /// \c createPreprocessingRecord() prior to preprocessing. 851 PreprocessingRecord *Record = nullptr; 852 853 /// Cached tokens state. 854 using CachedTokensTy = SmallVector<Token, 1>; 855 856 /// Cached tokens are stored here when we do backtracking or 857 /// lookahead. They are "lexed" by the CachingLex() method. 858 CachedTokensTy CachedTokens; 859 860 /// The position of the cached token that CachingLex() should 861 /// "lex" next. 862 /// 863 /// If it points beyond the CachedTokens vector, it means that a normal 864 /// Lex() should be invoked. 865 CachedTokensTy::size_type CachedLexPos = 0; 866 867 /// Stack of backtrack positions, allowing nested backtracks. 868 /// 869 /// The EnableBacktrackAtThisPos() method pushes a position to 870 /// indicate where CachedLexPos should be set when the BackTrack() method is 871 /// invoked (at which point the last position is popped). 872 std::vector<CachedTokensTy::size_type> BacktrackPositions; 873 874 struct MacroInfoChain { 875 MacroInfo MI; 876 MacroInfoChain *Next; 877 }; 878 879 /// MacroInfos are managed as a chain for easy disposal. This is the head 880 /// of that list. 881 MacroInfoChain *MIChainHead = nullptr; 882 883 void updateOutOfDateIdentifier(IdentifierInfo &II) const; 884 885 public: 886 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 887 DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM, 888 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 889 IdentifierInfoLookup *IILookup = nullptr, 890 bool OwnsHeaderSearch = false, 891 TranslationUnitKind TUKind = TU_Complete); 892 893 ~Preprocessor(); 894 895 /// Initialize the preprocessor using information about the target. 896 /// 897 /// \param Target is owned by the caller and must remain valid for the 898 /// lifetime of the preprocessor. 899 /// \param AuxTarget is owned by the caller and must remain valid for 900 /// the lifetime of the preprocessor. 901 void Initialize(const TargetInfo &Target, 902 const TargetInfo *AuxTarget = nullptr); 903 904 /// Initialize the preprocessor to parse a model file 905 /// 906 /// To parse model files the preprocessor of the original source is reused to 907 /// preserver the identifier table. However to avoid some duplicate 908 /// information in the preprocessor some cleanup is needed before it is used 909 /// to parse model files. This method does that cleanup. 910 void InitializeForModelFile(); 911 912 /// Cleanup after model file parsing 913 void FinalizeForModelFile(); 914 915 /// Retrieve the preprocessor options used to initialize this 916 /// preprocessor. getPreprocessorOpts()917 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 918 getDiagnostics()919 DiagnosticsEngine &getDiagnostics() const { return *Diags; } setDiagnostics(DiagnosticsEngine & D)920 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 921 getLangOpts()922 const LangOptions &getLangOpts() const { return LangOpts; } getTargetInfo()923 const TargetInfo &getTargetInfo() const { return *Target; } getAuxTargetInfo()924 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } getFileManager()925 FileManager &getFileManager() const { return FileMgr; } getSourceManager()926 SourceManager &getSourceManager() const { return SourceMgr; } getHeaderSearchInfo()927 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 928 getIdentifierTable()929 IdentifierTable &getIdentifierTable() { return Identifiers; } getIdentifierTable()930 const IdentifierTable &getIdentifierTable() const { return Identifiers; } getSelectorTable()931 SelectorTable &getSelectorTable() { return Selectors; } getBuiltinInfo()932 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; } getPreprocessorAllocator()933 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 934 setExternalSource(ExternalPreprocessorSource * Source)935 void setExternalSource(ExternalPreprocessorSource *Source) { 936 ExternalSource = Source; 937 } 938 getExternalSource()939 ExternalPreprocessorSource *getExternalSource() const { 940 return ExternalSource; 941 } 942 943 /// Retrieve the module loader associated with this preprocessor. getModuleLoader()944 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 945 hadModuleLoaderFatalFailure()946 bool hadModuleLoaderFatalFailure() const { 947 return TheModuleLoader.HadFatalFailure; 948 } 949 950 /// Retrieve the number of Directives that have been processed by the 951 /// Preprocessor. getNumDirectives()952 unsigned getNumDirectives() const { 953 return NumDirectives; 954 } 955 956 /// True if we are currently preprocessing a #if or #elif directive isParsingIfOrElifDirective()957 bool isParsingIfOrElifDirective() const { 958 return ParsingIfOrElifDirective; 959 } 960 961 /// Control whether the preprocessor retains comments in output. SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)962 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 963 this->KeepComments = KeepComments | KeepMacroComments; 964 this->KeepMacroComments = KeepMacroComments; 965 } 966 getCommentRetentionState()967 bool getCommentRetentionState() const { return KeepComments; } 968 setPragmasEnabled(bool Enabled)969 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } getPragmasEnabled()970 bool getPragmasEnabled() const { return PragmasEnabled; } 971 SetSuppressIncludeNotFoundError(bool Suppress)972 void SetSuppressIncludeNotFoundError(bool Suppress) { 973 SuppressIncludeNotFoundError = Suppress; 974 } 975 GetSuppressIncludeNotFoundError()976 bool GetSuppressIncludeNotFoundError() { 977 return SuppressIncludeNotFoundError; 978 } 979 980 /// Sets whether the preprocessor is responsible for producing output or if 981 /// it is producing tokens to be consumed by Parse and Sema. setPreprocessedOutput(bool IsPreprocessedOutput)982 void setPreprocessedOutput(bool IsPreprocessedOutput) { 983 PreprocessedOutput = IsPreprocessedOutput; 984 } 985 986 /// Returns true if the preprocessor is responsible for generating output, 987 /// false if it is producing tokens to be consumed by Parse and Sema. isPreprocessedOutput()988 bool isPreprocessedOutput() const { return PreprocessedOutput; } 989 990 /// Return true if we are lexing directly from the specified lexer. isCurrentLexer(const PreprocessorLexer * L)991 bool isCurrentLexer(const PreprocessorLexer *L) const { 992 return CurPPLexer == L; 993 } 994 995 /// Return the current lexer being lexed from. 996 /// 997 /// Note that this ignores any potentially active macro expansions and _Pragma 998 /// expansions going on at the time. getCurrentLexer()999 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 1000 1001 /// Return the current file lexer being lexed from. 1002 /// 1003 /// Note that this ignores any potentially active macro expansions and _Pragma 1004 /// expansions going on at the time. 1005 PreprocessorLexer *getCurrentFileLexer() const; 1006 1007 /// Return the submodule owning the file being lexed. This may not be 1008 /// the current module if we have changed modules since entering the file. getCurrentLexerSubmodule()1009 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; } 1010 1011 /// Returns the FileID for the preprocessor predefines. getPredefinesFileID()1012 FileID getPredefinesFileID() const { return PredefinesFileID; } 1013 1014 /// \{ 1015 /// Accessors for preprocessor callbacks. 1016 /// 1017 /// Note that this class takes ownership of any PPCallbacks object given to 1018 /// it. getPPCallbacks()1019 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } addPPCallbacks(std::unique_ptr<PPCallbacks> C)1020 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 1021 if (Callbacks) 1022 C = std::make_unique<PPChainedCallbacks>(std::move(C), 1023 std::move(Callbacks)); 1024 Callbacks = std::move(C); 1025 } 1026 /// \} 1027 1028 /// Get the number of tokens processed so far. getTokenCount()1029 unsigned getTokenCount() const { return TokenCount; } 1030 1031 /// Get the max number of tokens before issuing a -Wmax-tokens warning. getMaxTokens()1032 unsigned getMaxTokens() const { return MaxTokens; } 1033 overrideMaxTokens(unsigned Value,SourceLocation Loc)1034 void overrideMaxTokens(unsigned Value, SourceLocation Loc) { 1035 MaxTokens = Value; 1036 MaxTokensOverrideLoc = Loc; 1037 }; 1038 getMaxTokensOverrideLoc()1039 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; } 1040 1041 /// Register a function that would be called on each token in the final 1042 /// expanded token stream. 1043 /// This also reports annotation tokens produced by the parser. setTokenWatcher(llvm::unique_function<void (const clang::Token &)> F)1044 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) { 1045 OnToken = std::move(F); 1046 } 1047 setPreprocessToken(bool Preprocess)1048 void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; } 1049 isMacroDefined(StringRef Id)1050 bool isMacroDefined(StringRef Id) { 1051 return isMacroDefined(&Identifiers.get(Id)); 1052 } isMacroDefined(const IdentifierInfo * II)1053 bool isMacroDefined(const IdentifierInfo *II) { 1054 return II->hasMacroDefinition() && 1055 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 1056 } 1057 1058 /// Determine whether II is defined as a macro within the module M, 1059 /// if that is a module that we've already preprocessed. Does not check for 1060 /// macros imported into M. isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)1061 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 1062 if (!II->hasMacroDefinition()) 1063 return false; 1064 auto I = Submodules.find(M); 1065 if (I == Submodules.end()) 1066 return false; 1067 auto J = I->second.Macros.find(II); 1068 if (J == I->second.Macros.end()) 1069 return false; 1070 auto *MD = J->second.getLatest(); 1071 return MD && MD->isDefined(); 1072 } 1073 getMacroDefinition(const IdentifierInfo * II)1074 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 1075 if (!II->hasMacroDefinition()) 1076 return {}; 1077 1078 MacroState &S = CurSubmoduleState->Macros[II]; 1079 auto *MD = S.getLatest(); 1080 while (MD && isa<VisibilityMacroDirective>(MD)) 1081 MD = MD->getPrevious(); 1082 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 1083 S.getActiveModuleMacros(*this, II), 1084 S.isAmbiguous(*this, II)); 1085 } 1086 getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)1087 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 1088 SourceLocation Loc) { 1089 if (!II->hadMacroDefinition()) 1090 return {}; 1091 1092 MacroState &S = CurSubmoduleState->Macros[II]; 1093 MacroDirective::DefInfo DI; 1094 if (auto *MD = S.getLatest()) 1095 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 1096 // FIXME: Compute the set of active module macros at the specified location. 1097 return MacroDefinition(DI.getDirective(), 1098 S.getActiveModuleMacros(*this, II), 1099 S.isAmbiguous(*this, II)); 1100 } 1101 1102 /// Given an identifier, return its latest non-imported MacroDirective 1103 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. getLocalMacroDirective(const IdentifierInfo * II)1104 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 1105 if (!II->hasMacroDefinition()) 1106 return nullptr; 1107 1108 auto *MD = getLocalMacroDirectiveHistory(II); 1109 if (!MD || MD->getDefinition().isUndefined()) 1110 return nullptr; 1111 1112 return MD; 1113 } 1114 getMacroInfo(const IdentifierInfo * II)1115 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 1116 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 1117 } 1118 getMacroInfo(const IdentifierInfo * II)1119 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 1120 if (!II->hasMacroDefinition()) 1121 return nullptr; 1122 if (auto MD = getMacroDefinition(II)) 1123 return MD.getMacroInfo(); 1124 return nullptr; 1125 } 1126 1127 /// Given an identifier, return the latest non-imported macro 1128 /// directive for that identifier. 1129 /// 1130 /// One can iterate over all previous macro directives from the most recent 1131 /// one. 1132 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 1133 1134 /// Add a directive to the macro directive history for this identifier. 1135 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)1136 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 1137 SourceLocation Loc) { 1138 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 1139 appendMacroDirective(II, MD); 1140 return MD; 1141 } appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)1142 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 1143 MacroInfo *MI) { 1144 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 1145 } 1146 1147 /// Set a MacroDirective that was loaded from a PCH file. 1148 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, 1149 MacroDirective *MD); 1150 1151 /// Register an exported macro for a module and identifier. 1152 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, 1153 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 1154 ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II); 1155 1156 /// Get the list of leaf (non-overridden) module macros for a name. getLeafModuleMacros(const IdentifierInfo * II)1157 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 1158 if (II->isOutOfDate()) 1159 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 1160 auto I = LeafModuleMacros.find(II); 1161 if (I != LeafModuleMacros.end()) 1162 return I->second; 1163 return None; 1164 } 1165 1166 /// Get the list of submodules that we're currently building. getBuildingSubmodules()1167 ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const { 1168 return BuildingSubmoduleStack; 1169 } 1170 1171 /// \{ 1172 /// Iterators for the macro history table. Currently defined macros have 1173 /// IdentifierInfo::hasMacroDefinition() set and an empty 1174 /// MacroInfo::getUndefLoc() at the head of the list. 1175 using macro_iterator = MacroMap::const_iterator; 1176 1177 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 1178 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 1179 1180 llvm::iterator_range<macro_iterator> 1181 macros(bool IncludeExternalMacros = true) const { 1182 macro_iterator begin = macro_begin(IncludeExternalMacros); 1183 macro_iterator end = macro_end(IncludeExternalMacros); 1184 return llvm::make_range(begin, end); 1185 } 1186 1187 /// \} 1188 1189 /// Return the name of the macro defined before \p Loc that has 1190 /// spelling \p Tokens. If there are multiple macros with same spelling, 1191 /// return the last one defined. 1192 StringRef getLastMacroWithSpelling(SourceLocation Loc, 1193 ArrayRef<TokenValue> Tokens) const; 1194 getPredefines()1195 const std::string &getPredefines() const { return Predefines; } 1196 1197 /// Set the predefines for this Preprocessor. 1198 /// 1199 /// These predefines are automatically injected when parsing the main file. setPredefines(const char * P)1200 void setPredefines(const char *P) { Predefines = P; } setPredefines(StringRef P)1201 void setPredefines(StringRef P) { Predefines = std::string(P); } 1202 1203 /// Return information about the specified preprocessor 1204 /// identifier token. getIdentifierInfo(StringRef Name)1205 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 1206 return &Identifiers.get(Name); 1207 } 1208 1209 /// Add the specified pragma handler to this preprocessor. 1210 /// 1211 /// If \p Namespace is non-null, then it is a token required to exist on the 1212 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 1213 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); AddPragmaHandler(PragmaHandler * Handler)1214 void AddPragmaHandler(PragmaHandler *Handler) { 1215 AddPragmaHandler(StringRef(), Handler); 1216 } 1217 1218 /// Remove the specific pragma handler from this preprocessor. 1219 /// 1220 /// If \p Namespace is non-null, then it should be the namespace that 1221 /// \p Handler was added to. It is an error to remove a handler that 1222 /// has not been registered. 1223 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); RemovePragmaHandler(PragmaHandler * Handler)1224 void RemovePragmaHandler(PragmaHandler *Handler) { 1225 RemovePragmaHandler(StringRef(), Handler); 1226 } 1227 1228 /// Install empty handlers for all pragmas (making them ignored). 1229 void IgnorePragmas(); 1230 1231 /// Set empty line handler. setEmptylineHandler(EmptylineHandler * Handler)1232 void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; } 1233 getEmptylineHandler()1234 EmptylineHandler *getEmptylineHandler() const { return Emptyline; } 1235 1236 /// Add the specified comment handler to the preprocessor. 1237 void addCommentHandler(CommentHandler *Handler); 1238 1239 /// Remove the specified comment handler. 1240 /// 1241 /// It is an error to remove a handler that has not been registered. 1242 void removeCommentHandler(CommentHandler *Handler); 1243 1244 /// Set the code completion handler to the given object. setCodeCompletionHandler(CodeCompletionHandler & Handler)1245 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 1246 CodeComplete = &Handler; 1247 } 1248 1249 /// Retrieve the current code-completion handler. getCodeCompletionHandler()1250 CodeCompletionHandler *getCodeCompletionHandler() const { 1251 return CodeComplete; 1252 } 1253 1254 /// Clear out the code completion handler. clearCodeCompletionHandler()1255 void clearCodeCompletionHandler() { 1256 CodeComplete = nullptr; 1257 } 1258 1259 /// Hook used by the lexer to invoke the "included file" code 1260 /// completion point. 1261 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled); 1262 1263 /// Hook used by the lexer to invoke the "natural language" code 1264 /// completion point. 1265 void CodeCompleteNaturalLanguage(); 1266 1267 /// Set the code completion token for filtering purposes. setCodeCompletionIdentifierInfo(IdentifierInfo * Filter)1268 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) { 1269 CodeCompletionII = Filter; 1270 } 1271 1272 /// Set the code completion token range for detecting replacement range later 1273 /// on. setCodeCompletionTokenRange(const SourceLocation Start,const SourceLocation End)1274 void setCodeCompletionTokenRange(const SourceLocation Start, 1275 const SourceLocation End) { 1276 CodeCompletionTokenRange = {Start, End}; 1277 } getCodeCompletionTokenRange()1278 SourceRange getCodeCompletionTokenRange() const { 1279 return CodeCompletionTokenRange; 1280 } 1281 1282 /// Get the code completion token for filtering purposes. getCodeCompletionFilter()1283 StringRef getCodeCompletionFilter() { 1284 if (CodeCompletionII) 1285 return CodeCompletionII->getName(); 1286 return {}; 1287 } 1288 1289 /// Retrieve the preprocessing record, or NULL if there is no 1290 /// preprocessing record. getPreprocessingRecord()1291 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 1292 1293 /// Create a new preprocessing record, which will keep track of 1294 /// all macro expansions, macro definitions, etc. 1295 void createPreprocessingRecord(); 1296 1297 /// Returns true if the FileEntry is the PCH through header. 1298 bool isPCHThroughHeader(const FileEntry *FE); 1299 1300 /// True if creating a PCH with a through header. 1301 bool creatingPCHWithThroughHeader(); 1302 1303 /// True if using a PCH with a through header. 1304 bool usingPCHWithThroughHeader(); 1305 1306 /// True if creating a PCH with a #pragma hdrstop. 1307 bool creatingPCHWithPragmaHdrStop(); 1308 1309 /// True if using a PCH with a #pragma hdrstop. 1310 bool usingPCHWithPragmaHdrStop(); 1311 1312 /// Skip tokens until after the #include of the through header or 1313 /// until after a #pragma hdrstop. 1314 void SkipTokensWhileUsingPCH(); 1315 1316 /// Process directives while skipping until the through header or 1317 /// #pragma hdrstop is found. 1318 void HandleSkippedDirectiveWhileUsingPCH(Token &Result, 1319 SourceLocation HashLoc); 1320 1321 /// Enter the specified FileID as the main source file, 1322 /// which implicitly adds the builtin defines etc. 1323 void EnterMainSourceFile(); 1324 1325 /// Inform the preprocessor callbacks that processing is complete. 1326 void EndSourceFile(); 1327 1328 /// Add a source file to the top of the include stack and 1329 /// start lexing tokens from it instead of the current buffer. 1330 /// 1331 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1332 bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir, 1333 SourceLocation Loc); 1334 1335 /// Add a Macro to the top of the include stack and start lexing 1336 /// tokens from it instead of the current buffer. 1337 /// 1338 /// \param Args specifies the tokens input to a function-like macro. 1339 /// \param ILEnd specifies the location of the ')' for a function-like macro 1340 /// or the identifier for an object-like macro. 1341 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, 1342 MacroArgs *Args); 1343 1344 private: 1345 /// Add a "macro" context to the top of the include stack, 1346 /// which will cause the lexer to start returning the specified tokens. 1347 /// 1348 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1349 /// will not be subject to further macro expansion. Otherwise, these tokens 1350 /// will be re-macro-expanded when/if expansion is enabled. 1351 /// 1352 /// If \p OwnsTokens is false, this method assumes that the specified stream 1353 /// of tokens has a permanent owner somewhere, so they do not need to be 1354 /// copied. If it is true, it assumes the array of tokens is allocated with 1355 /// \c new[] and the Preprocessor will delete[] it. 1356 /// 1357 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag 1358 /// set, see the flag documentation for details. 1359 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1360 bool DisableMacroExpansion, bool OwnsTokens, 1361 bool IsReinject); 1362 1363 public: EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion,bool IsReinject)1364 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1365 bool DisableMacroExpansion, bool IsReinject) { 1366 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true, 1367 IsReinject); 1368 } 1369 EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion,bool IsReinject)1370 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion, 1371 bool IsReinject) { 1372 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false, 1373 IsReinject); 1374 } 1375 1376 /// Pop the current lexer/macro exp off the top of the lexer stack. 1377 /// 1378 /// This should only be used in situations where the current state of the 1379 /// top-of-stack lexer is known. 1380 void RemoveTopOfLexerStack(); 1381 1382 /// From the point that this method is called, and until 1383 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1384 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1385 /// make the Preprocessor re-lex the same tokens. 1386 /// 1387 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1388 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1389 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1390 /// 1391 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1392 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1393 /// tokens will continue indefinitely. 1394 /// 1395 void EnableBacktrackAtThisPos(); 1396 1397 /// Disable the last EnableBacktrackAtThisPos call. 1398 void CommitBacktrackedTokens(); 1399 1400 /// Make Preprocessor re-lex the tokens that were lexed since 1401 /// EnableBacktrackAtThisPos() was previously called. 1402 void Backtrack(); 1403 1404 /// True if EnableBacktrackAtThisPos() was called and 1405 /// caching of tokens is on. isBacktrackEnabled()1406 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1407 1408 /// Lex the next token for this preprocessor. 1409 void Lex(Token &Result); 1410 1411 /// Lex a token, forming a header-name token if possible. 1412 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); 1413 1414 bool LexAfterModuleImport(Token &Result); 1415 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); 1416 1417 void makeModuleVisible(Module *M, SourceLocation Loc); 1418 getModuleImportLoc(Module * M)1419 SourceLocation getModuleImportLoc(Module *M) const { 1420 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1421 } 1422 1423 /// Lex a string literal, which may be the concatenation of multiple 1424 /// string literals and may even come from macro expansion. 1425 /// \returns true on success, false if a error diagnostic has been generated. LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1426 bool LexStringLiteral(Token &Result, std::string &String, 1427 const char *DiagnosticTag, bool AllowMacroExpansion) { 1428 if (AllowMacroExpansion) 1429 Lex(Result); 1430 else 1431 LexUnexpandedToken(Result); 1432 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1433 AllowMacroExpansion); 1434 } 1435 1436 /// Complete the lexing of a string literal where the first token has 1437 /// already been lexed (see LexStringLiteral). 1438 bool FinishLexStringLiteral(Token &Result, std::string &String, 1439 const char *DiagnosticTag, 1440 bool AllowMacroExpansion); 1441 1442 /// Lex a token. If it's a comment, keep lexing until we get 1443 /// something not a comment. 1444 /// 1445 /// This is useful in -E -C mode where comments would foul up preprocessor 1446 /// directive handling. LexNonComment(Token & Result)1447 void LexNonComment(Token &Result) { 1448 do 1449 Lex(Result); 1450 while (Result.getKind() == tok::comment); 1451 } 1452 1453 /// Just like Lex, but disables macro expansion of identifier tokens. LexUnexpandedToken(Token & Result)1454 void LexUnexpandedToken(Token &Result) { 1455 // Disable macro expansion. 1456 bool OldVal = DisableMacroExpansion; 1457 DisableMacroExpansion = true; 1458 // Lex the token. 1459 Lex(Result); 1460 1461 // Reenable it. 1462 DisableMacroExpansion = OldVal; 1463 } 1464 1465 /// Like LexNonComment, but this disables macro expansion of 1466 /// identifier tokens. LexUnexpandedNonComment(Token & Result)1467 void LexUnexpandedNonComment(Token &Result) { 1468 do 1469 LexUnexpandedToken(Result); 1470 while (Result.getKind() == tok::comment); 1471 } 1472 1473 /// Parses a simple integer literal to get its numeric value. Floating 1474 /// point literals and user defined literals are rejected. Used primarily to 1475 /// handle pragmas that accept integer arguments. 1476 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1477 1478 /// Disables macro expansion everywhere except for preprocessor directives. SetMacroExpansionOnlyInDirectives()1479 void SetMacroExpansionOnlyInDirectives() { 1480 DisableMacroExpansion = true; 1481 MacroExpansionInDirectivesOverride = true; 1482 } 1483 1484 /// Peeks ahead N tokens and returns that token without consuming any 1485 /// tokens. 1486 /// 1487 /// LookAhead(0) returns the next token that would be returned by Lex(), 1488 /// LookAhead(1) returns the token after it, etc. This returns normal 1489 /// tokens after phase 5. As such, it is equivalent to using 1490 /// 'Lex', not 'LexUnexpandedToken'. LookAhead(unsigned N)1491 const Token &LookAhead(unsigned N) { 1492 assert(LexLevel == 0 && "cannot use lookahead while lexing"); 1493 if (CachedLexPos + N < CachedTokens.size()) 1494 return CachedTokens[CachedLexPos+N]; 1495 else 1496 return PeekAhead(N+1); 1497 } 1498 1499 /// When backtracking is enabled and tokens are cached, 1500 /// this allows to revert a specific number of tokens. 1501 /// 1502 /// Note that the number of tokens being reverted should be up to the last 1503 /// backtrack position, not more. RevertCachedTokens(unsigned N)1504 void RevertCachedTokens(unsigned N) { 1505 assert(isBacktrackEnabled() && 1506 "Should only be called when tokens are cached for backtracking"); 1507 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 1508 && "Should revert tokens up to the last backtrack position, not more"); 1509 assert(signed(CachedLexPos) - signed(N) >= 0 && 1510 "Corrupted backtrack positions ?"); 1511 CachedLexPos -= N; 1512 } 1513 1514 /// Enters a token in the token stream to be lexed next. 1515 /// 1516 /// If BackTrack() is called afterwards, the token will remain at the 1517 /// insertion point. 1518 /// If \p IsReinject is true, resulting token will have Token::IsReinjected 1519 /// flag set. See the flag documentation for details. EnterToken(const Token & Tok,bool IsReinject)1520 void EnterToken(const Token &Tok, bool IsReinject) { 1521 if (LexLevel) { 1522 // It's not correct in general to enter caching lex mode while in the 1523 // middle of a nested lexing action. 1524 auto TokCopy = std::make_unique<Token[]>(1); 1525 TokCopy[0] = Tok; 1526 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject); 1527 } else { 1528 EnterCachingLexMode(); 1529 assert(IsReinject && "new tokens in the middle of cached stream"); 1530 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1531 } 1532 } 1533 1534 /// We notify the Preprocessor that if it is caching tokens (because 1535 /// backtrack is enabled) it should replace the most recent cached tokens 1536 /// with the given annotation token. This function has no effect if 1537 /// backtracking is not enabled. 1538 /// 1539 /// Note that the use of this function is just for optimization, so that the 1540 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1541 /// invoked. AnnotateCachedTokens(const Token & Tok)1542 void AnnotateCachedTokens(const Token &Tok) { 1543 assert(Tok.isAnnotation() && "Expected annotation token"); 1544 if (CachedLexPos != 0 && isBacktrackEnabled()) 1545 AnnotatePreviousCachedTokens(Tok); 1546 } 1547 1548 /// Get the location of the last cached token, suitable for setting the end 1549 /// location of an annotation token. getLastCachedTokenLocation()1550 SourceLocation getLastCachedTokenLocation() const { 1551 assert(CachedLexPos != 0); 1552 return CachedTokens[CachedLexPos-1].getLastLoc(); 1553 } 1554 1555 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1556 /// CachedTokens. 1557 bool IsPreviousCachedToken(const Token &Tok) const; 1558 1559 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1560 /// in \p NewToks. 1561 /// 1562 /// Useful when a token needs to be split in smaller ones and CachedTokens 1563 /// most recent token must to be updated to reflect that. 1564 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1565 1566 /// Replace the last token with an annotation token. 1567 /// 1568 /// Like AnnotateCachedTokens(), this routine replaces an 1569 /// already-parsed (and resolved) token with an annotation 1570 /// token. However, this routine only replaces the last token with 1571 /// the annotation token; it does not affect any other cached 1572 /// tokens. This function has no effect if backtracking is not 1573 /// enabled. ReplaceLastTokenWithAnnotation(const Token & Tok)1574 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1575 assert(Tok.isAnnotation() && "Expected annotation token"); 1576 if (CachedLexPos != 0 && isBacktrackEnabled()) 1577 CachedTokens[CachedLexPos-1] = Tok; 1578 } 1579 1580 /// Enter an annotation token into the token stream. 1581 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, 1582 void *AnnotationVal); 1583 1584 /// Determine whether it's possible for a future call to Lex to produce an 1585 /// annotation token created by a previous call to EnterAnnotationToken. mightHavePendingAnnotationTokens()1586 bool mightHavePendingAnnotationTokens() { 1587 return CurLexerKind != CLK_Lexer; 1588 } 1589 1590 /// Update the current token to represent the provided 1591 /// identifier, in order to cache an action performed by typo correction. TypoCorrectToken(const Token & Tok)1592 void TypoCorrectToken(const Token &Tok) { 1593 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1594 if (CachedLexPos != 0 && isBacktrackEnabled()) 1595 CachedTokens[CachedLexPos-1] = Tok; 1596 } 1597 1598 /// Recompute the current lexer kind based on the CurLexer/ 1599 /// CurTokenLexer pointers. 1600 void recomputeCurLexerKind(); 1601 1602 /// Returns true if incremental processing is enabled isIncrementalProcessingEnabled()1603 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1604 1605 /// Enables the incremental processing 1606 void enableIncrementalProcessing(bool value = true) { 1607 IncrementalProcessing = value; 1608 } 1609 1610 /// Specify the point at which code-completion will be performed. 1611 /// 1612 /// \param File the file in which code completion should occur. If 1613 /// this file is included multiple times, code-completion will 1614 /// perform completion the first time it is included. If NULL, this 1615 /// function clears out the code-completion point. 1616 /// 1617 /// \param Line the line at which code completion should occur 1618 /// (1-based). 1619 /// 1620 /// \param Column the column at which code completion should occur 1621 /// (1-based). 1622 /// 1623 /// \returns true if an error occurred, false otherwise. 1624 bool SetCodeCompletionPoint(const FileEntry *File, 1625 unsigned Line, unsigned Column); 1626 1627 /// Determine if we are performing code completion. isCodeCompletionEnabled()1628 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1629 1630 /// Returns the location of the code-completion point. 1631 /// 1632 /// Returns an invalid location if code-completion is not enabled or the file 1633 /// containing the code-completion point has not been lexed yet. getCodeCompletionLoc()1634 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1635 1636 /// Returns the start location of the file of code-completion point. 1637 /// 1638 /// Returns an invalid location if code-completion is not enabled or the file 1639 /// containing the code-completion point has not been lexed yet. getCodeCompletionFileLoc()1640 SourceLocation getCodeCompletionFileLoc() const { 1641 return CodeCompletionFileLoc; 1642 } 1643 1644 /// Returns true if code-completion is enabled and we have hit the 1645 /// code-completion point. isCodeCompletionReached()1646 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1647 1648 /// Note that we hit the code-completion point. setCodeCompletionReached()1649 void setCodeCompletionReached() { 1650 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1651 CodeCompletionReached = true; 1652 // Silence any diagnostics that occur after we hit the code-completion. 1653 getDiagnostics().setSuppressAllDiagnostics(true); 1654 } 1655 1656 /// The location of the currently-active \#pragma clang 1657 /// arc_cf_code_audited begin. 1658 /// 1659 /// Returns an invalid location if there is no such pragma active. 1660 std::pair<IdentifierInfo *, SourceLocation> getPragmaARCCFCodeAuditedInfo()1661 getPragmaARCCFCodeAuditedInfo() const { 1662 return PragmaARCCFCodeAuditedInfo; 1663 } 1664 1665 /// Set the location of the currently-active \#pragma clang 1666 /// arc_cf_code_audited begin. An invalid location ends the pragma. setPragmaARCCFCodeAuditedInfo(IdentifierInfo * Ident,SourceLocation Loc)1667 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, 1668 SourceLocation Loc) { 1669 PragmaARCCFCodeAuditedInfo = {Ident, Loc}; 1670 } 1671 1672 /// The location of the currently-active \#pragma clang 1673 /// assume_nonnull begin. 1674 /// 1675 /// Returns an invalid location if there is no such pragma active. getPragmaAssumeNonNullLoc()1676 SourceLocation getPragmaAssumeNonNullLoc() const { 1677 return PragmaAssumeNonNullLoc; 1678 } 1679 1680 /// Set the location of the currently-active \#pragma clang 1681 /// assume_nonnull begin. An invalid location ends the pragma. setPragmaAssumeNonNullLoc(SourceLocation Loc)1682 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 1683 PragmaAssumeNonNullLoc = Loc; 1684 } 1685 1686 /// Set the directory in which the main file should be considered 1687 /// to have been found, if it is not a real file. setMainFileDir(const DirectoryEntry * Dir)1688 void setMainFileDir(const DirectoryEntry *Dir) { 1689 MainFileDir = Dir; 1690 } 1691 1692 /// Instruct the preprocessor to skip part of the main source file. 1693 /// 1694 /// \param Bytes The number of bytes in the preamble to skip. 1695 /// 1696 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 1697 /// start of a line. setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)1698 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 1699 SkipMainFilePreamble.first = Bytes; 1700 SkipMainFilePreamble.second = StartOfLine; 1701 } 1702 1703 /// Forwarding function for diagnostics. This emits a diagnostic at 1704 /// the specified Token's location, translating the token's start 1705 /// position in the current buffer into a SourcePosition object for rendering. Diag(SourceLocation Loc,unsigned DiagID)1706 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 1707 return Diags->Report(Loc, DiagID); 1708 } 1709 Diag(const Token & Tok,unsigned DiagID)1710 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 1711 return Diags->Report(Tok.getLocation(), DiagID); 1712 } 1713 1714 /// Return the 'spelling' of the token at the given 1715 /// location; does not go up to the spelling location or down to the 1716 /// expansion location. 1717 /// 1718 /// \param buffer A buffer which will be used only if the token requires 1719 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 1720 /// \param invalid If non-null, will be set \c true if an error occurs. 1721 StringRef getSpelling(SourceLocation loc, 1722 SmallVectorImpl<char> &buffer, 1723 bool *invalid = nullptr) const { 1724 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 1725 } 1726 1727 /// Return the 'spelling' of the Tok token. 1728 /// 1729 /// The spelling of a token is the characters used to represent the token in 1730 /// the source file after trigraph expansion and escaped-newline folding. In 1731 /// particular, this wants to get the true, uncanonicalized, spelling of 1732 /// things like digraphs, UCNs, etc. 1733 /// 1734 /// \param Invalid If non-null, will be set \c true if an error occurs. 1735 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 1736 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 1737 } 1738 1739 /// Get the spelling of a token into a preallocated buffer, instead 1740 /// of as an std::string. 1741 /// 1742 /// The caller is required to allocate enough space for the token, which is 1743 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 1744 /// actual result is returned. 1745 /// 1746 /// Note that this method may do two possible things: it may either fill in 1747 /// the buffer specified with characters, or it may *change the input pointer* 1748 /// to point to a constant buffer with the data already in it (avoiding a 1749 /// copy). The caller is not allowed to modify the returned buffer pointer 1750 /// if an internal buffer is returned. 1751 unsigned getSpelling(const Token &Tok, const char *&Buffer, 1752 bool *Invalid = nullptr) const { 1753 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 1754 } 1755 1756 /// Get the spelling of a token into a SmallVector. 1757 /// 1758 /// Note that the returned StringRef may not point to the 1759 /// supplied buffer if a copy can be avoided. 1760 StringRef getSpelling(const Token &Tok, 1761 SmallVectorImpl<char> &Buffer, 1762 bool *Invalid = nullptr) const; 1763 1764 /// Relex the token at the specified location. 1765 /// \returns true if there was a failure, false on success. 1766 bool getRawToken(SourceLocation Loc, Token &Result, 1767 bool IgnoreWhiteSpace = false) { 1768 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 1769 } 1770 1771 /// Given a Token \p Tok that is a numeric constant with length 1, 1772 /// return the character. 1773 char 1774 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 1775 bool *Invalid = nullptr) const { 1776 assert(Tok.is(tok::numeric_constant) && 1777 Tok.getLength() == 1 && "Called on unsupported token"); 1778 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 1779 1780 // If the token is carrying a literal data pointer, just use it. 1781 if (const char *D = Tok.getLiteralData()) 1782 return *D; 1783 1784 // Otherwise, fall back on getCharacterData, which is slower, but always 1785 // works. 1786 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid); 1787 } 1788 1789 /// Retrieve the name of the immediate macro expansion. 1790 /// 1791 /// This routine starts from a source location, and finds the name of the 1792 /// macro responsible for its immediate expansion. It looks through any 1793 /// intervening macro argument expansions to compute this. It returns a 1794 /// StringRef that refers to the SourceManager-owned buffer of the source 1795 /// where that macro name is spelled. Thus, the result shouldn't out-live 1796 /// the SourceManager. getImmediateMacroName(SourceLocation Loc)1797 StringRef getImmediateMacroName(SourceLocation Loc) { 1798 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 1799 } 1800 1801 /// Plop the specified string into a scratch buffer and set the 1802 /// specified token's location and length to it. 1803 /// 1804 /// If specified, the source location provides a location of the expansion 1805 /// point of the token. 1806 void CreateString(StringRef Str, Token &Tok, 1807 SourceLocation ExpansionLocStart = SourceLocation(), 1808 SourceLocation ExpansionLocEnd = SourceLocation()); 1809 1810 /// Split the first Length characters out of the token starting at TokLoc 1811 /// and return a location pointing to the split token. Re-lexing from the 1812 /// split token will return the split token rather than the original. 1813 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length); 1814 1815 /// Computes the source location just past the end of the 1816 /// token at this source location. 1817 /// 1818 /// This routine can be used to produce a source location that 1819 /// points just past the end of the token referenced by \p Loc, and 1820 /// is generally used when a diagnostic needs to point just after a 1821 /// token where it expected something different that it received. If 1822 /// the returned source location would not be meaningful (e.g., if 1823 /// it points into a macro), this routine returns an invalid 1824 /// source location. 1825 /// 1826 /// \param Offset an offset from the end of the token, where the source 1827 /// location should refer to. The default offset (0) produces a source 1828 /// location pointing just past the end of the token; an offset of 1 produces 1829 /// a source location pointing to the last character in the token, etc. 1830 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 1831 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 1832 } 1833 1834 /// Returns true if the given MacroID location points at the first 1835 /// token of the macro expansion. 1836 /// 1837 /// \param MacroBegin If non-null and function returns true, it is set to 1838 /// begin location of the macro. 1839 bool isAtStartOfMacroExpansion(SourceLocation loc, 1840 SourceLocation *MacroBegin = nullptr) const { 1841 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 1842 MacroBegin); 1843 } 1844 1845 /// Returns true if the given MacroID location points at the last 1846 /// token of the macro expansion. 1847 /// 1848 /// \param MacroEnd If non-null and function returns true, it is set to 1849 /// end location of the macro. 1850 bool isAtEndOfMacroExpansion(SourceLocation loc, 1851 SourceLocation *MacroEnd = nullptr) const { 1852 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 1853 } 1854 1855 /// Print the token to stderr, used for debugging. 1856 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 1857 void DumpLocation(SourceLocation Loc) const; 1858 void DumpMacro(const MacroInfo &MI) const; 1859 void dumpMacroInfo(const IdentifierInfo *II); 1860 1861 /// Given a location that specifies the start of a 1862 /// token, return a new location that specifies a character within the token. AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)1863 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 1864 unsigned Char) const { 1865 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 1866 } 1867 1868 /// Increment the counters for the number of token paste operations 1869 /// performed. 1870 /// 1871 /// If fast was specified, this is a 'fast paste' case we handled. IncrementPasteCounter(bool isFast)1872 void IncrementPasteCounter(bool isFast) { 1873 if (isFast) 1874 ++NumFastTokenPaste; 1875 else 1876 ++NumTokenPaste; 1877 } 1878 1879 void PrintStats(); 1880 1881 size_t getTotalMemory() const; 1882 1883 /// When the macro expander pastes together a comment (/##/) in Microsoft 1884 /// mode, this method handles updating the current state, returning the 1885 /// token on the next source line. 1886 void HandleMicrosoftCommentPaste(Token &Tok); 1887 1888 //===--------------------------------------------------------------------===// 1889 // Preprocessor callback methods. These are invoked by a lexer as various 1890 // directives and events are found. 1891 1892 /// Given a tok::raw_identifier token, look up the 1893 /// identifier information for the token and install it into the token, 1894 /// updating the token kind accordingly. 1895 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 1896 1897 private: 1898 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 1899 1900 public: 1901 /// Specifies the reason for poisoning an identifier. 1902 /// 1903 /// If that identifier is accessed while poisoned, then this reason will be 1904 /// used instead of the default "poisoned" diagnostic. 1905 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 1906 1907 /// Display reason for poisoned identifier. 1908 void HandlePoisonedIdentifier(Token & Identifier); 1909 MaybeHandlePoisonedIdentifier(Token & Identifier)1910 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 1911 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 1912 if(II->isPoisoned()) { 1913 HandlePoisonedIdentifier(Identifier); 1914 } 1915 } 1916 } 1917 1918 private: 1919 /// Identifiers used for SEH handling in Borland. These are only 1920 /// allowed in particular circumstances 1921 // __except block 1922 IdentifierInfo *Ident__exception_code, 1923 *Ident___exception_code, 1924 *Ident_GetExceptionCode; 1925 // __except filter expression 1926 IdentifierInfo *Ident__exception_info, 1927 *Ident___exception_info, 1928 *Ident_GetExceptionInfo; 1929 // __finally 1930 IdentifierInfo *Ident__abnormal_termination, 1931 *Ident___abnormal_termination, 1932 *Ident_AbnormalTermination; 1933 1934 const char *getCurLexerEndPos(); 1935 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); 1936 1937 public: 1938 void PoisonSEHIdentifiers(bool Poison = true); // Borland 1939 1940 /// Callback invoked when the lexer reads an identifier and has 1941 /// filled in the tokens IdentifierInfo member. 1942 /// 1943 /// This callback potentially macro expands it or turns it into a named 1944 /// token (like 'for'). 1945 /// 1946 /// \returns true if we actually computed a token, false if we need to 1947 /// lex again. 1948 bool HandleIdentifier(Token &Identifier); 1949 1950 /// Callback invoked when the lexer hits the end of the current file. 1951 /// 1952 /// This either returns the EOF token and returns true, or 1953 /// pops a level off the include stack and returns false, at which point the 1954 /// client should call lex again. 1955 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 1956 1957 /// Callback invoked when the current TokenLexer hits the end of its 1958 /// token stream. 1959 bool HandleEndOfTokenLexer(Token &Result); 1960 1961 /// Callback invoked when the lexer sees a # token at the start of a 1962 /// line. 1963 /// 1964 /// This consumes the directive, modifies the lexer/preprocessor state, and 1965 /// advances the lexer(s) so that the next token read is the correct one. 1966 void HandleDirective(Token &Result); 1967 1968 /// Ensure that the next token is a tok::eod token. 1969 /// 1970 /// If not, emit a diagnostic and consume up until the eod. 1971 /// If \p EnableMacros is true, then we consider macros that expand to zero 1972 /// tokens as being ok. 1973 /// 1974 /// \return The location of the end of the directive (the terminating 1975 /// newline). 1976 SourceLocation CheckEndOfDirective(const char *DirType, 1977 bool EnableMacros = false); 1978 1979 /// Read and discard all tokens remaining on the current line until 1980 /// the tok::eod token is found. Returns the range of the skipped tokens. 1981 SourceRange DiscardUntilEndOfDirective(); 1982 1983 /// Returns true if the preprocessor has seen a use of 1984 /// __DATE__ or __TIME__ in the file so far. SawDateOrTime()1985 bool SawDateOrTime() const { 1986 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 1987 } getCounterValue()1988 unsigned getCounterValue() const { return CounterValue; } setCounterValue(unsigned V)1989 void setCounterValue(unsigned V) { CounterValue = V; } 1990 1991 /// Retrieves the module that we're currently building, if any. 1992 Module *getCurrentModule(); 1993 1994 /// Allocate a new MacroInfo object with the provided SourceLocation. 1995 MacroInfo *AllocateMacroInfo(SourceLocation L); 1996 1997 /// Turn the specified lexer token into a fully checked and spelled 1998 /// filename, e.g. as an operand of \#include. 1999 /// 2000 /// The caller is expected to provide a buffer that is large enough to hold 2001 /// the spelling of the filename, but is also expected to handle the case 2002 /// when this method decides to use a different buffer. 2003 /// 2004 /// \returns true if the input filename was in <>'s or false if it was 2005 /// in ""'s. 2006 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer); 2007 2008 /// Given a "foo" or \<foo> reference, look up the indicated file. 2009 /// 2010 /// Returns None on failure. \p isAngled indicates whether the file 2011 /// reference is for system \#include's or not (i.e. using <> instead of ""). 2012 Optional<FileEntryRef> 2013 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, 2014 const DirectoryLookup *FromDir, const FileEntry *FromFile, 2015 const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath, 2016 SmallVectorImpl<char> *RelativePath, 2017 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, 2018 bool *IsFrameworkFound, bool SkipCache = false); 2019 2020 /// Get the DirectoryLookup structure used to find the current 2021 /// FileEntry, if CurLexer is non-null and if applicable. 2022 /// 2023 /// This allows us to implement \#include_next and find directory-specific 2024 /// properties. GetCurDirLookup()2025 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; } 2026 2027 /// Return true if we're in the top-level file, not in a \#include. 2028 bool isInPrimaryFile() const; 2029 2030 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is 2031 /// followed by EOD. Return true if the token is not a valid on-off-switch. 2032 bool LexOnOffSwitch(tok::OnOffSwitch &Result); 2033 2034 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 2035 bool *ShadowFlag = nullptr); 2036 2037 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma); 2038 Module *LeaveSubmodule(bool ForPragma); 2039 2040 private: 2041 friend void TokenLexer::ExpandFunctionArguments(); 2042 PushIncludeMacroStack()2043 void PushIncludeMacroStack() { 2044 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer"); 2045 IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule, 2046 std::move(CurLexer), CurPPLexer, 2047 std::move(CurTokenLexer), CurDirLookup); 2048 CurPPLexer = nullptr; 2049 } 2050 PopIncludeMacroStack()2051 void PopIncludeMacroStack() { 2052 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 2053 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 2054 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 2055 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 2056 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; 2057 CurLexerKind = IncludeMacroStack.back().CurLexerKind; 2058 IncludeMacroStack.pop_back(); 2059 } 2060 2061 void PropagateLineStartLeadingSpaceInfo(Token &Result); 2062 2063 /// Determine whether we need to create module macros for #defines in the 2064 /// current context. 2065 bool needModuleMacros() const; 2066 2067 /// Update the set of active module macros and ambiguity flag for a module 2068 /// macro name. 2069 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 2070 2071 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 2072 SourceLocation Loc); 2073 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 2074 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 2075 bool isPublic); 2076 2077 /// Lex and validate a macro name, which occurs after a 2078 /// \#define or \#undef. 2079 /// 2080 /// \param MacroNameTok Token that represents the name defined or undefined. 2081 /// \param IsDefineUndef Kind if preprocessor directive. 2082 /// \param ShadowFlag Points to flag that is set if macro name shadows 2083 /// a keyword. 2084 /// 2085 /// This emits a diagnostic, sets the token kind to eod, 2086 /// and discards the rest of the macro line if the macro name is invalid. 2087 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 2088 bool *ShadowFlag = nullptr); 2089 2090 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 2091 /// entire line) of the macro's tokens and adds them to MacroInfo, and while 2092 /// doing so performs certain validity checks including (but not limited to): 2093 /// - # (stringization) is followed by a macro parameter 2094 /// \param MacroNameTok - Token that represents the macro name 2095 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard 2096 /// 2097 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and 2098 /// returns a nullptr if an invalid sequence of tokens is encountered. 2099 MacroInfo *ReadOptionalMacroParameterListAndBody( 2100 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard); 2101 2102 /// The ( starting an argument list of a macro definition has just been read. 2103 /// Lex the rest of the parameters and the closing ), updating \p MI with 2104 /// what we learn and saving in \p LastTok the last token read. 2105 /// Return true if an error occurs parsing the arg list. 2106 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); 2107 2108 /// We just read a \#if or related directive and decided that the 2109 /// subsequent tokens are in the \#if'd out portion of the 2110 /// file. Lex the rest of the file, until we see an \#endif. If \p 2111 /// FoundNonSkipPortion is true, then we have already emitted code for part of 2112 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 2113 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 2114 /// already seen one so a \#else directive is a duplicate. When this returns, 2115 /// the caller can lex the first valid token. 2116 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, 2117 SourceLocation IfTokenLoc, 2118 bool FoundNonSkipPortion, bool FoundElse, 2119 SourceLocation ElseLoc = SourceLocation()); 2120 2121 /// Information about the result for evaluating an expression for a 2122 /// preprocessor directive. 2123 struct DirectiveEvalResult { 2124 /// Whether the expression was evaluated as true or not. 2125 bool Conditional; 2126 2127 /// True if the expression contained identifiers that were undefined. 2128 bool IncludedUndefinedIds; 2129 2130 /// The source range for the expression. 2131 SourceRange ExprRange; 2132 }; 2133 2134 /// Evaluate an integer constant expression that may occur after a 2135 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2136 /// 2137 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2138 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); 2139 2140 /// Install the standard preprocessor pragmas: 2141 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 2142 void RegisterBuiltinPragmas(); 2143 2144 /// Register builtin macros such as __LINE__ with the identifier table. 2145 void RegisterBuiltinMacros(); 2146 2147 /// If an identifier token is read that is to be expanded as a macro, handle 2148 /// it and return the next token as 'Tok'. If we lexed a token, return true; 2149 /// otherwise the caller should lex again. 2150 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD); 2151 2152 /// Cache macro expanded tokens for TokenLexers. 2153 // 2154 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 2155 /// going to lex in the cache and when it finishes the tokens are removed 2156 /// from the end of the cache. 2157 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 2158 ArrayRef<Token> tokens); 2159 2160 void removeCachedMacroExpandedTokensOfLastLexer(); 2161 2162 /// Determine whether the next preprocessor token to be 2163 /// lexed is a '('. If so, consume the token and return true, if not, this 2164 /// method should have no observable side-effect on the lexed tokens. 2165 bool isNextPPTokenLParen(); 2166 2167 /// After reading "MACRO(", this method is invoked to read all of the formal 2168 /// arguments specified for the macro invocation. Returns null on error. 2169 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, 2170 SourceLocation &MacroEnd); 2171 2172 /// If an identifier token is read that is to be expanded 2173 /// as a builtin macro, handle it and return the next token as 'Tok'. 2174 void ExpandBuiltinMacro(Token &Tok); 2175 2176 /// Read a \c _Pragma directive, slice it up, process it, then 2177 /// return the first token after the directive. 2178 /// This assumes that the \c _Pragma token has just been read into \p Tok. 2179 void Handle_Pragma(Token &Tok); 2180 2181 /// Like Handle_Pragma except the pragma text is not enclosed within 2182 /// a string literal. 2183 void HandleMicrosoft__pragma(Token &Tok); 2184 2185 /// Add a lexer to the top of the include stack and 2186 /// start lexing tokens from it instead of the current buffer. 2187 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir); 2188 2189 /// Set the FileID for the preprocessor predefines. setPredefinesFileID(FileID FID)2190 void setPredefinesFileID(FileID FID) { 2191 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 2192 PredefinesFileID = FID; 2193 } 2194 2195 /// Set the FileID for the PCH through header. 2196 void setPCHThroughHeaderFileID(FileID FID); 2197 2198 /// Returns true if we are lexing from a file and not a 2199 /// pragma or a macro. IsFileLexer(const Lexer * L,const PreprocessorLexer * P)2200 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 2201 return L ? !L->isPragmaLexer() : P != nullptr; 2202 } 2203 IsFileLexer(const IncludeStackInfo & I)2204 static bool IsFileLexer(const IncludeStackInfo& I) { 2205 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 2206 } 2207 IsFileLexer()2208 bool IsFileLexer() const { 2209 return IsFileLexer(CurLexer.get(), CurPPLexer); 2210 } 2211 2212 //===--------------------------------------------------------------------===// 2213 // Caching stuff. 2214 void CachingLex(Token &Result); 2215 InCachingLexMode()2216 bool InCachingLexMode() const { 2217 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 2218 // that we are past EOF, not that we are in CachingLex mode. 2219 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty(); 2220 } 2221 2222 void EnterCachingLexMode(); 2223 void EnterCachingLexModeUnchecked(); 2224 ExitCachingLexMode()2225 void ExitCachingLexMode() { 2226 if (InCachingLexMode()) 2227 RemoveTopOfLexerStack(); 2228 } 2229 2230 const Token &PeekAhead(unsigned N); 2231 void AnnotatePreviousCachedTokens(const Token &Tok); 2232 2233 //===--------------------------------------------------------------------===// 2234 /// Handle*Directive - implement the various preprocessor directives. These 2235 /// should side-effect the current preprocessor object so that the next call 2236 /// to Lex() will return the appropriate token next. 2237 void HandleLineDirective(); 2238 void HandleDigitDirective(Token &Tok); 2239 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 2240 void HandleIdentSCCSDirective(Token &Tok); 2241 void HandleMacroPublicDirective(Token &Tok); 2242 void HandleMacroPrivateDirective(); 2243 2244 /// An additional notification that can be produced by a header inclusion or 2245 /// import to tell the parser what happened. 2246 struct ImportAction { 2247 enum ActionKind { 2248 None, 2249 ModuleBegin, 2250 ModuleImport, 2251 SkippedModuleImport, 2252 Failure, 2253 } Kind; 2254 Module *ModuleForHeader = nullptr; 2255 2256 ImportAction(ActionKind AK, Module *Mod = nullptr) KindImportAction2257 : Kind(AK), ModuleForHeader(Mod) { 2258 assert((AK == None || Mod || AK == Failure) && 2259 "no module for module action"); 2260 } 2261 }; 2262 2263 Optional<FileEntryRef> LookupHeaderIncludeOrImport( 2264 const DirectoryLookup *&CurDir, StringRef &Filename, 2265 SourceLocation FilenameLoc, CharSourceRange FilenameRange, 2266 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, 2267 bool &IsMapped, const DirectoryLookup *LookupFrom, 2268 const FileEntry *LookupFromFile, StringRef &LookupFilename, 2269 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, 2270 ModuleMap::KnownHeader &SuggestedModule, bool isAngled); 2271 2272 // File inclusion. 2273 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, 2274 const DirectoryLookup *LookupFrom = nullptr, 2275 const FileEntry *LookupFromFile = nullptr); 2276 ImportAction 2277 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok, 2278 Token &FilenameTok, SourceLocation EndLoc, 2279 const DirectoryLookup *LookupFrom = nullptr, 2280 const FileEntry *LookupFromFile = nullptr); 2281 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 2282 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 2283 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 2284 void HandleMicrosoftImportDirective(Token &Tok); 2285 2286 public: 2287 /// Check that the given module is available, producing a diagnostic if not. 2288 /// \return \c true if the check failed (because the module is not available). 2289 /// \c false if the module appears to be usable. 2290 static bool checkModuleIsAvailable(const LangOptions &LangOpts, 2291 const TargetInfo &TargetInfo, 2292 DiagnosticsEngine &Diags, Module *M); 2293 2294 // Module inclusion testing. 2295 /// Find the module that owns the source or header file that 2296 /// \p Loc points to. If the location is in a file that was included 2297 /// into a module, or is outside any module, returns nullptr. 2298 Module *getModuleForLocation(SourceLocation Loc); 2299 2300 /// We want to produce a diagnostic at location IncLoc concerning an 2301 /// unreachable effect at location MLoc (eg, where a desired entity was 2302 /// declared or defined). Determine whether the right way to make MLoc 2303 /// reachable is by #include, and if so, what header should be included. 2304 /// 2305 /// This is not necessarily fast, and might load unexpected module maps, so 2306 /// should only be called by code that intends to produce an error. 2307 /// 2308 /// \param IncLoc The location at which the missing effect was detected. 2309 /// \param MLoc A location within an unimported module at which the desired 2310 /// effect occurred. 2311 /// \return A file that can be #included to provide the desired effect. Null 2312 /// if no such file could be determined or if a #include is not 2313 /// appropriate (eg, if a module should be imported instead). 2314 const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 2315 SourceLocation MLoc); 2316 isRecordingPreamble()2317 bool isRecordingPreamble() const { 2318 return PreambleConditionalStack.isRecording(); 2319 } 2320 hasRecordedPreamble()2321 bool hasRecordedPreamble() const { 2322 return PreambleConditionalStack.hasRecordedPreamble(); 2323 } 2324 getPreambleConditionalStack()2325 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const { 2326 return PreambleConditionalStack.getStack(); 2327 } 2328 setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s)2329 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2330 PreambleConditionalStack.setStack(s); 2331 } 2332 setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,llvm::Optional<PreambleSkipInfo> SkipInfo)2333 void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s, 2334 llvm::Optional<PreambleSkipInfo> SkipInfo) { 2335 PreambleConditionalStack.startReplaying(); 2336 PreambleConditionalStack.setStack(s); 2337 PreambleConditionalStack.SkipInfo = SkipInfo; 2338 } 2339 getPreambleSkipInfo()2340 llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const { 2341 return PreambleConditionalStack.SkipInfo; 2342 } 2343 2344 private: 2345 /// After processing predefined file, initialize the conditional stack from 2346 /// the preamble. 2347 void replayPreambleConditionalStack(); 2348 2349 // Macro handling. 2350 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard); 2351 void HandleUndefDirective(); 2352 2353 // Conditional Inclusion. 2354 void HandleIfdefDirective(Token &Result, const Token &HashToken, 2355 bool isIfndef, bool ReadAnyTokensBeforeDirective); 2356 void HandleIfDirective(Token &IfToken, const Token &HashToken, 2357 bool ReadAnyTokensBeforeDirective); 2358 void HandleEndifDirective(Token &EndifToken); 2359 void HandleElseDirective(Token &Result, const Token &HashToken); 2360 void HandleElifDirective(Token &ElifToken, const Token &HashToken); 2361 2362 // Pragmas. 2363 void HandlePragmaDirective(PragmaIntroducer Introducer); 2364 2365 public: 2366 void HandlePragmaOnce(Token &OnceTok); 2367 void HandlePragmaMark(); 2368 void HandlePragmaPoison(); 2369 void HandlePragmaSystemHeader(Token &SysHeaderTok); 2370 void HandlePragmaDependency(Token &DependencyTok); 2371 void HandlePragmaPushMacro(Token &Tok); 2372 void HandlePragmaPopMacro(Token &Tok); 2373 void HandlePragmaIncludeAlias(Token &Tok); 2374 void HandlePragmaModuleBuild(Token &Tok); 2375 void HandlePragmaHdrstop(Token &Tok); 2376 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 2377 2378 // Return true and store the first token only if any CommentHandler 2379 // has inserted some tokens and getCommentRetentionState() is false. 2380 bool HandleComment(Token &result, SourceRange Comment); 2381 2382 /// A macro is used, update information about macros that need unused 2383 /// warnings. 2384 void markMacroAsUsed(MacroInfo *MI); 2385 2386 private: 2387 Optional<unsigned> 2388 getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc); 2389 2390 /// Contains the currently active skipped range mappings for skipping excluded 2391 /// conditional directives. 2392 ExcludedPreprocessorDirectiveSkipMapping 2393 *ExcludedConditionalDirectiveSkipMappings; 2394 }; 2395 2396 /// Abstract base class that describes a handler that will receive 2397 /// source ranges for each of the comments encountered in the source file. 2398 class CommentHandler { 2399 public: 2400 virtual ~CommentHandler(); 2401 2402 // The handler shall return true if it has pushed any tokens 2403 // to be read using e.g. EnterToken or EnterTokenStream. 2404 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 2405 }; 2406 2407 /// Abstract base class that describes a handler that will receive 2408 /// source ranges for empty lines encountered in the source file. 2409 class EmptylineHandler { 2410 public: 2411 virtual ~EmptylineHandler(); 2412 2413 // The handler handles empty lines. 2414 virtual void HandleEmptyline(SourceRange Range) = 0; 2415 }; 2416 2417 /// Registry of pragma handlers added by plugins 2418 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; 2419 2420 } // namespace clang 2421 2422 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H 2423