1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::Preprocessor interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H 15 #define LLVM_CLANG_LEX_PREPROCESSOR_H 16 17 #include "clang/Basic/Diagnostic.h" 18 #include "clang/Basic/IdentifierTable.h" 19 #include "clang/Basic/LLVM.h" 20 #include "clang/Basic/LangOptions.h" 21 #include "clang/Basic/Module.h" 22 #include "clang/Basic/SourceLocation.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Basic/TokenKinds.h" 25 #include "clang/Lex/Lexer.h" 26 #include "clang/Lex/MacroInfo.h" 27 #include "clang/Lex/ModuleLoader.h" 28 #include "clang/Lex/ModuleMap.h" 29 #include "clang/Lex/PPCallbacks.h" 30 #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" 31 #include "clang/Lex/Token.h" 32 #include "clang/Lex/TokenLexer.h" 33 #include "llvm/ADT/ArrayRef.h" 34 #include "llvm/ADT/DenseMap.h" 35 #include "llvm/ADT/FoldingSet.h" 36 #include "llvm/ADT/FunctionExtras.h" 37 #include "llvm/ADT/None.h" 38 #include "llvm/ADT/Optional.h" 39 #include "llvm/ADT/PointerUnion.h" 40 #include "llvm/ADT/STLExtras.h" 41 #include "llvm/ADT/SmallPtrSet.h" 42 #include "llvm/ADT/SmallVector.h" 43 #include "llvm/ADT/StringRef.h" 44 #include "llvm/ADT/TinyPtrVector.h" 45 #include "llvm/ADT/iterator_range.h" 46 #include "llvm/Support/Allocator.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/Registry.h" 49 #include <cassert> 50 #include <cstddef> 51 #include <cstdint> 52 #include <map> 53 #include <memory> 54 #include <string> 55 #include <utility> 56 #include <vector> 57 58 namespace llvm { 59 60 template<unsigned InternalLen> class SmallString; 61 62 } // namespace llvm 63 64 namespace clang { 65 66 class CodeCompletionHandler; 67 class CommentHandler; 68 class DirectoryEntry; 69 class DirectoryLookup; 70 class ExternalPreprocessorSource; 71 class FileEntry; 72 class FileManager; 73 class HeaderSearch; 74 class MacroArgs; 75 class PragmaHandler; 76 class PragmaNamespace; 77 class PreprocessingRecord; 78 class PreprocessorLexer; 79 class PreprocessorOptions; 80 class ScratchBuffer; 81 class TargetInfo; 82 83 namespace Builtin { 84 class Context; 85 } 86 87 /// Stores token information for comparing actual tokens with 88 /// predefined values. Only handles simple tokens and identifiers. 89 class TokenValue { 90 tok::TokenKind Kind; 91 IdentifierInfo *II; 92 93 public: TokenValue(tok::TokenKind Kind)94 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) { 95 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported."); 96 assert(Kind != tok::identifier && 97 "Identifiers should be created by TokenValue(IdentifierInfo *)"); 98 assert(!tok::isLiteral(Kind) && "Literals are not supported."); 99 assert(!tok::isAnnotation(Kind) && "Annotations are not supported."); 100 } 101 TokenValue(IdentifierInfo * II)102 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {} 103 104 bool operator==(const Token &Tok) const { 105 return Tok.getKind() == Kind && 106 (!II || II == Tok.getIdentifierInfo()); 107 } 108 }; 109 110 /// Context in which macro name is used. 111 enum MacroUse { 112 // other than #define or #undef 113 MU_Other = 0, 114 115 // macro name specified in #define 116 MU_Define = 1, 117 118 // macro name specified in #undef 119 MU_Undef = 2 120 }; 121 122 /// Engages in a tight little dance with the lexer to efficiently 123 /// preprocess tokens. 124 /// 125 /// Lexers know only about tokens within a single source file, and don't 126 /// know anything about preprocessor-level issues like the \#include stack, 127 /// token expansion, etc. 128 class Preprocessor { 129 friend class VAOptDefinitionContext; 130 friend class VariadicMacroScopeGuard; 131 132 llvm::unique_function<void(const clang::Token &)> OnToken; 133 std::shared_ptr<PreprocessorOptions> PPOpts; 134 DiagnosticsEngine *Diags; 135 LangOptions &LangOpts; 136 const TargetInfo *Target = nullptr; 137 const TargetInfo *AuxTarget = nullptr; 138 FileManager &FileMgr; 139 SourceManager &SourceMgr; 140 std::unique_ptr<ScratchBuffer> ScratchBuf; 141 HeaderSearch &HeaderInfo; 142 ModuleLoader &TheModuleLoader; 143 144 /// External source of macros. 145 ExternalPreprocessorSource *ExternalSource; 146 147 /// A BumpPtrAllocator object used to quickly allocate and release 148 /// objects internal to the Preprocessor. 149 llvm::BumpPtrAllocator BP; 150 151 /// Identifiers for builtin macros and other builtins. 152 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ 153 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ 154 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ 155 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ 156 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__ 157 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ 158 IdentifierInfo *Ident__COUNTER__; // __COUNTER__ 159 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma 160 IdentifierInfo *Ident__identifier; // __identifier 161 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__ 162 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__ 163 IdentifierInfo *Ident__has_feature; // __has_feature 164 IdentifierInfo *Ident__has_extension; // __has_extension 165 IdentifierInfo *Ident__has_builtin; // __has_builtin 166 IdentifierInfo *Ident__has_attribute; // __has_attribute 167 IdentifierInfo *Ident__has_include; // __has_include 168 IdentifierInfo *Ident__has_include_next; // __has_include_next 169 IdentifierInfo *Ident__has_warning; // __has_warning 170 IdentifierInfo *Ident__is_identifier; // __is_identifier 171 IdentifierInfo *Ident__building_module; // __building_module 172 IdentifierInfo *Ident__MODULE__; // __MODULE__ 173 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute 174 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute 175 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute 176 IdentifierInfo *Ident__is_target_arch; // __is_target_arch 177 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor 178 IdentifierInfo *Ident__is_target_os; // __is_target_os 179 IdentifierInfo *Ident__is_target_environment; // __is_target_environment 180 181 // Weak, only valid (and set) while InMacroArgs is true. 182 Token* ArgMacro; 183 184 SourceLocation DATELoc, TIMELoc; 185 186 // Next __COUNTER__ value, starts at 0. 187 unsigned CounterValue = 0; 188 189 enum { 190 /// Maximum depth of \#includes. 191 MaxAllowedIncludeStackDepth = 200 192 }; 193 194 // State that is set before the preprocessor begins. 195 bool KeepComments : 1; 196 bool KeepMacroComments : 1; 197 bool SuppressIncludeNotFoundError : 1; 198 199 // State that changes while the preprocessor runs: 200 bool InMacroArgs : 1; // True if parsing fn macro invocation args. 201 202 /// Whether the preprocessor owns the header search object. 203 bool OwnsHeaderSearch : 1; 204 205 /// True if macro expansion is disabled. 206 bool DisableMacroExpansion : 1; 207 208 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion) 209 /// when parsing preprocessor directives. 210 bool MacroExpansionInDirectivesOverride : 1; 211 212 class ResetMacroExpansionHelper; 213 214 /// Whether we have already loaded macros from the external source. 215 mutable bool ReadMacrosFromExternalSource : 1; 216 217 /// True if pragmas are enabled. 218 bool PragmasEnabled : 1; 219 220 /// True if the current build action is a preprocessing action. 221 bool PreprocessedOutput : 1; 222 223 /// True if we are currently preprocessing a #if or #elif directive 224 bool ParsingIfOrElifDirective; 225 226 /// True if we are pre-expanding macro arguments. 227 bool InMacroArgPreExpansion; 228 229 /// Mapping/lookup information for all identifiers in 230 /// the program, including program keywords. 231 mutable IdentifierTable Identifiers; 232 233 /// This table contains all the selectors in the program. 234 /// 235 /// Unlike IdentifierTable above, this table *isn't* populated by the 236 /// preprocessor. It is declared/expanded here because its role/lifetime is 237 /// conceptually similar to the IdentifierTable. In addition, the current 238 /// control flow (in clang::ParseAST()), make it convenient to put here. 239 /// 240 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to 241 /// the lifetime of the preprocessor. 242 SelectorTable Selectors; 243 244 /// Information about builtins. 245 std::unique_ptr<Builtin::Context> BuiltinInfo; 246 247 /// Tracks all of the pragmas that the client registered 248 /// with this preprocessor. 249 std::unique_ptr<PragmaNamespace> PragmaHandlers; 250 251 /// Pragma handlers of the original source is stored here during the 252 /// parsing of a model file. 253 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup; 254 255 /// Tracks all of the comment handlers that the client registered 256 /// with this preprocessor. 257 std::vector<CommentHandler *> CommentHandlers; 258 259 /// True if we want to ignore EOF token and continue later on (thus 260 /// avoid tearing the Lexer and etc. down). 261 bool IncrementalProcessing = false; 262 263 /// The kind of translation unit we are processing. 264 TranslationUnitKind TUKind; 265 266 /// The code-completion handler. 267 CodeCompletionHandler *CodeComplete = nullptr; 268 269 /// The file that we're performing code-completion for, if any. 270 const FileEntry *CodeCompletionFile = nullptr; 271 272 /// The offset in file for the code-completion point. 273 unsigned CodeCompletionOffset = 0; 274 275 /// The location for the code-completion point. This gets instantiated 276 /// when the CodeCompletionFile gets \#include'ed for preprocessing. 277 SourceLocation CodeCompletionLoc; 278 279 /// The start location for the file of the code-completion point. 280 /// 281 /// This gets instantiated when the CodeCompletionFile gets \#include'ed 282 /// for preprocessing. 283 SourceLocation CodeCompletionFileLoc; 284 285 /// The source location of the \c import contextual keyword we just 286 /// lexed, if any. 287 SourceLocation ModuleImportLoc; 288 289 /// The module import path that we're currently processing. 290 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath; 291 292 /// Whether the last token we lexed was an '@'. 293 bool LastTokenWasAt = false; 294 295 /// A position within a C++20 import-seq. 296 class ImportSeq { 297 public: 298 enum State : int { 299 // Positive values represent a number of unclosed brackets. 300 AtTopLevel = 0, 301 AfterTopLevelTokenSeq = -1, 302 AfterExport = -2, 303 AfterImportSeq = -3, 304 }; 305 ImportSeq(State S)306 ImportSeq(State S) : S(S) {} 307 308 /// Saw any kind of open bracket. handleOpenBracket()309 void handleOpenBracket() { 310 S = static_cast<State>(std::max<int>(S, 0) + 1); 311 } 312 /// Saw any kind of close bracket other than '}'. handleCloseBracket()313 void handleCloseBracket() { 314 S = static_cast<State>(std::max<int>(S, 1) - 1); 315 } 316 /// Saw a close brace. handleCloseBrace()317 void handleCloseBrace() { 318 handleCloseBracket(); 319 if (S == AtTopLevel && !AfterHeaderName) 320 S = AfterTopLevelTokenSeq; 321 } 322 /// Saw a semicolon. handleSemi()323 void handleSemi() { 324 if (atTopLevel()) { 325 S = AfterTopLevelTokenSeq; 326 AfterHeaderName = false; 327 } 328 } 329 330 /// Saw an 'export' identifier. handleExport()331 void handleExport() { 332 if (S == AfterTopLevelTokenSeq) 333 S = AfterExport; 334 else if (S <= 0) 335 S = AtTopLevel; 336 } 337 /// Saw an 'import' identifier. handleImport()338 void handleImport() { 339 if (S == AfterTopLevelTokenSeq || S == AfterExport) 340 S = AfterImportSeq; 341 else if (S <= 0) 342 S = AtTopLevel; 343 } 344 345 /// Saw a 'header-name' token; do not recognize any more 'import' tokens 346 /// until we reach a top-level semicolon. handleHeaderName()347 void handleHeaderName() { 348 if (S == AfterImportSeq) 349 AfterHeaderName = true; 350 handleMisc(); 351 } 352 353 /// Saw any other token. handleMisc()354 void handleMisc() { 355 if (S <= 0) 356 S = AtTopLevel; 357 } 358 atTopLevel()359 bool atTopLevel() { return S <= 0; } afterImportSeq()360 bool afterImportSeq() { return S == AfterImportSeq; } 361 362 private: 363 State S; 364 /// Whether we're in the pp-import-suffix following the header-name in a 365 /// pp-import. If so, a close-brace is not sufficient to end the 366 /// top-level-token-seq of an import-seq. 367 bool AfterHeaderName = false; 368 }; 369 370 /// Our current position within a C++20 import-seq. 371 ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq; 372 373 /// Whether the module import expects an identifier next. Otherwise, 374 /// it expects a '.' or ';'. 375 bool ModuleImportExpectsIdentifier = false; 376 377 /// The identifier and source location of the currently-active 378 /// \#pragma clang arc_cf_code_audited begin. 379 std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo; 380 381 /// The source location of the currently-active 382 /// \#pragma clang assume_nonnull begin. 383 SourceLocation PragmaAssumeNonNullLoc; 384 385 /// True if we hit the code-completion point. 386 bool CodeCompletionReached = false; 387 388 /// The code completion token containing the information 389 /// on the stem that is to be code completed. 390 IdentifierInfo *CodeCompletionII = nullptr; 391 392 /// Range for the code completion token. 393 SourceRange CodeCompletionTokenRange; 394 395 /// The directory that the main file should be considered to occupy, 396 /// if it does not correspond to a real file (as happens when building a 397 /// module). 398 const DirectoryEntry *MainFileDir = nullptr; 399 400 /// The number of bytes that we will initially skip when entering the 401 /// main file, along with a flag that indicates whether skipping this number 402 /// of bytes will place the lexer at the start of a line. 403 /// 404 /// This is used when loading a precompiled preamble. 405 std::pair<int, bool> SkipMainFilePreamble; 406 407 /// Whether we hit an error due to reaching max allowed include depth. Allows 408 /// to avoid hitting the same error over and over again. 409 bool HasReachedMaxIncludeDepth = false; 410 411 /// The number of currently-active calls to Lex. 412 /// 413 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often 414 /// require asking for multiple additional tokens. This counter makes it 415 /// possible for Lex to detect whether it's producing a token for the end 416 /// of phase 4 of translation or for some other situation. 417 unsigned LexLevel = 0; 418 419 /// The number of (LexLevel 0) preprocessor tokens. 420 unsigned TokenCount = 0; 421 422 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens 423 /// warning, or zero for unlimited. 424 unsigned MaxTokens = 0; 425 SourceLocation MaxTokensOverrideLoc; 426 427 public: 428 struct PreambleSkipInfo { 429 SourceLocation HashTokenLoc; 430 SourceLocation IfTokenLoc; 431 bool FoundNonSkipPortion; 432 bool FoundElse; 433 SourceLocation ElseLoc; 434 PreambleSkipInfoPreambleSkipInfo435 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc, 436 bool FoundNonSkipPortion, bool FoundElse, 437 SourceLocation ElseLoc) 438 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc), 439 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse), 440 ElseLoc(ElseLoc) {} 441 }; 442 443 private: 444 friend class ASTReader; 445 friend class MacroArgs; 446 447 class PreambleConditionalStackStore { 448 enum State { 449 Off = 0, 450 Recording = 1, 451 Replaying = 2, 452 }; 453 454 public: 455 PreambleConditionalStackStore() = default; 456 startRecording()457 void startRecording() { ConditionalStackState = Recording; } startReplaying()458 void startReplaying() { ConditionalStackState = Replaying; } isRecording()459 bool isRecording() const { return ConditionalStackState == Recording; } isReplaying()460 bool isReplaying() const { return ConditionalStackState == Replaying; } 461 getStack()462 ArrayRef<PPConditionalInfo> getStack() const { 463 return ConditionalStack; 464 } 465 doneReplaying()466 void doneReplaying() { 467 ConditionalStack.clear(); 468 ConditionalStackState = Off; 469 } 470 setStack(ArrayRef<PPConditionalInfo> s)471 void setStack(ArrayRef<PPConditionalInfo> s) { 472 if (!isRecording() && !isReplaying()) 473 return; 474 ConditionalStack.clear(); 475 ConditionalStack.append(s.begin(), s.end()); 476 } 477 hasRecordedPreamble()478 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); } 479 reachedEOFWhileSkipping()480 bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); } 481 clearSkipInfo()482 void clearSkipInfo() { SkipInfo.reset(); } 483 484 llvm::Optional<PreambleSkipInfo> SkipInfo; 485 486 private: 487 SmallVector<PPConditionalInfo, 4> ConditionalStack; 488 State ConditionalStackState = Off; 489 } PreambleConditionalStack; 490 491 /// The current top of the stack that we're lexing from if 492 /// not expanding a macro and we are lexing directly from source code. 493 /// 494 /// Only one of CurLexer, or CurTokenLexer will be non-null. 495 std::unique_ptr<Lexer> CurLexer; 496 497 /// The current top of the stack what we're lexing from 498 /// if not expanding a macro. 499 /// 500 /// This is an alias for CurLexer. 501 PreprocessorLexer *CurPPLexer = nullptr; 502 503 /// Used to find the current FileEntry, if CurLexer is non-null 504 /// and if applicable. 505 /// 506 /// This allows us to implement \#include_next and find directory-specific 507 /// properties. 508 const DirectoryLookup *CurDirLookup = nullptr; 509 510 /// The current macro we are expanding, if we are expanding a macro. 511 /// 512 /// One of CurLexer and CurTokenLexer must be null. 513 std::unique_ptr<TokenLexer> CurTokenLexer; 514 515 /// The kind of lexer we're currently working with. 516 enum CurLexerKind { 517 CLK_Lexer, 518 CLK_TokenLexer, 519 CLK_CachingLexer, 520 CLK_LexAfterModuleImport 521 } CurLexerKind = CLK_Lexer; 522 523 /// If the current lexer is for a submodule that is being built, this 524 /// is that submodule. 525 Module *CurLexerSubmodule = nullptr; 526 527 /// Keeps track of the stack of files currently 528 /// \#included, and macros currently being expanded from, not counting 529 /// CurLexer/CurTokenLexer. 530 struct IncludeStackInfo { 531 enum CurLexerKind CurLexerKind; 532 Module *TheSubmodule; 533 std::unique_ptr<Lexer> TheLexer; 534 PreprocessorLexer *ThePPLexer; 535 std::unique_ptr<TokenLexer> TheTokenLexer; 536 const DirectoryLookup *TheDirLookup; 537 538 // The following constructors are completely useless copies of the default 539 // versions, only needed to pacify MSVC. IncludeStackInfoIncludeStackInfo540 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, 541 std::unique_ptr<Lexer> &&TheLexer, 542 PreprocessorLexer *ThePPLexer, 543 std::unique_ptr<TokenLexer> &&TheTokenLexer, 544 const DirectoryLookup *TheDirLookup) 545 : CurLexerKind(std::move(CurLexerKind)), 546 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), 547 ThePPLexer(std::move(ThePPLexer)), 548 TheTokenLexer(std::move(TheTokenLexer)), 549 TheDirLookup(std::move(TheDirLookup)) {} 550 }; 551 std::vector<IncludeStackInfo> IncludeMacroStack; 552 553 /// Actions invoked when some preprocessor activity is 554 /// encountered (e.g. a file is \#included, etc). 555 std::unique_ptr<PPCallbacks> Callbacks; 556 557 struct MacroExpandsInfo { 558 Token Tok; 559 MacroDefinition MD; 560 SourceRange Range; 561 MacroExpandsInfoMacroExpandsInfo562 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range) 563 : Tok(Tok), MD(MD), Range(Range) {} 564 }; 565 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks; 566 567 /// Information about a name that has been used to define a module macro. 568 struct ModuleMacroInfo { 569 /// The most recent macro directive for this identifier. 570 MacroDirective *MD; 571 572 /// The active module macros for this identifier. 573 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros; 574 575 /// The generation number at which we last updated ActiveModuleMacros. 576 /// \see Preprocessor::VisibleModules. 577 unsigned ActiveModuleMacrosGeneration = 0; 578 579 /// Whether this macro name is ambiguous. 580 bool IsAmbiguous = false; 581 582 /// The module macros that are overridden by this macro. 583 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros; 584 ModuleMacroInfoModuleMacroInfo585 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {} 586 }; 587 588 /// The state of a macro for an identifier. 589 class MacroState { 590 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State; 591 getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)592 ModuleMacroInfo *getModuleInfo(Preprocessor &PP, 593 const IdentifierInfo *II) const { 594 if (II->isOutOfDate()) 595 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 596 // FIXME: Find a spare bit on IdentifierInfo and store a 597 // HasModuleMacros flag. 598 if (!II->hasMacroDefinition() || 599 (!PP.getLangOpts().Modules && 600 !PP.getLangOpts().ModulesLocalVisibility) || 601 !PP.CurSubmoduleState->VisibleModules.getGeneration()) 602 return nullptr; 603 604 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 605 if (!Info) { 606 Info = new (PP.getPreprocessorAllocator()) 607 ModuleMacroInfo(State.get<MacroDirective *>()); 608 State = Info; 609 } 610 611 if (PP.CurSubmoduleState->VisibleModules.getGeneration() != 612 Info->ActiveModuleMacrosGeneration) 613 PP.updateModuleMacroInfo(II, *Info); 614 return Info; 615 } 616 617 public: MacroState()618 MacroState() : MacroState(nullptr) {} MacroState(MacroDirective * MD)619 MacroState(MacroDirective *MD) : State(MD) {} 620 MacroState(MacroState && O)621 MacroState(MacroState &&O) noexcept : State(O.State) { 622 O.State = (MacroDirective *)nullptr; 623 } 624 625 MacroState &operator=(MacroState &&O) noexcept { 626 auto S = O.State; 627 O.State = (MacroDirective *)nullptr; 628 State = S; 629 return *this; 630 } 631 ~MacroState()632 ~MacroState() { 633 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 634 Info->~ModuleMacroInfo(); 635 } 636 getLatest()637 MacroDirective *getLatest() const { 638 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 639 return Info->MD; 640 return State.get<MacroDirective*>(); 641 } 642 setLatest(MacroDirective * MD)643 void setLatest(MacroDirective *MD) { 644 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 645 Info->MD = MD; 646 else 647 State = MD; 648 } 649 isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)650 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const { 651 auto *Info = getModuleInfo(PP, II); 652 return Info ? Info->IsAmbiguous : false; 653 } 654 655 ArrayRef<ModuleMacro *> getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)656 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { 657 if (auto *Info = getModuleInfo(PP, II)) 658 return Info->ActiveModuleMacros; 659 return None; 660 } 661 findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)662 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, 663 SourceManager &SourceMgr) const { 664 // FIXME: Incorporate module macros into the result of this. 665 if (auto *Latest = getLatest()) 666 return Latest->findDirectiveAtLoc(Loc, SourceMgr); 667 return {}; 668 } 669 overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)670 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) { 671 if (auto *Info = getModuleInfo(PP, II)) { 672 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 673 Info->ActiveModuleMacros.begin(), 674 Info->ActiveModuleMacros.end()); 675 Info->ActiveModuleMacros.clear(); 676 Info->IsAmbiguous = false; 677 } 678 } 679 getOverriddenMacros()680 ArrayRef<ModuleMacro*> getOverriddenMacros() const { 681 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>()) 682 return Info->OverriddenMacros; 683 return None; 684 } 685 setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)686 void setOverriddenMacros(Preprocessor &PP, 687 ArrayRef<ModuleMacro *> Overrides) { 688 auto *Info = State.dyn_cast<ModuleMacroInfo*>(); 689 if (!Info) { 690 if (Overrides.empty()) 691 return; 692 Info = new (PP.getPreprocessorAllocator()) 693 ModuleMacroInfo(State.get<MacroDirective *>()); 694 State = Info; 695 } 696 Info->OverriddenMacros.clear(); 697 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(), 698 Overrides.begin(), Overrides.end()); 699 Info->ActiveModuleMacrosGeneration = 0; 700 } 701 }; 702 703 /// For each IdentifierInfo that was associated with a macro, we 704 /// keep a mapping to the history of all macro definitions and #undefs in 705 /// the reverse order (the latest one is in the head of the list). 706 /// 707 /// This mapping lives within the \p CurSubmoduleState. 708 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>; 709 710 struct SubmoduleState; 711 712 /// Information about a submodule that we're currently building. 713 struct BuildingSubmoduleInfo { 714 /// The module that we are building. 715 Module *M; 716 717 /// The location at which the module was included. 718 SourceLocation ImportLoc; 719 720 /// Whether we entered this submodule via a pragma. 721 bool IsPragma; 722 723 /// The previous SubmoduleState. 724 SubmoduleState *OuterSubmoduleState; 725 726 /// The number of pending module macro names when we started building this. 727 unsigned OuterPendingModuleMacroNames; 728 BuildingSubmoduleInfoBuildingSubmoduleInfo729 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma, 730 SubmoduleState *OuterSubmoduleState, 731 unsigned OuterPendingModuleMacroNames) 732 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma), 733 OuterSubmoduleState(OuterSubmoduleState), 734 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {} 735 }; 736 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack; 737 738 /// Information about a submodule's preprocessor state. 739 struct SubmoduleState { 740 /// The macros for the submodule. 741 MacroMap Macros; 742 743 /// The set of modules that are visible within the submodule. 744 VisibleModuleSet VisibleModules; 745 746 // FIXME: CounterValue? 747 // FIXME: PragmaPushMacroInfo? 748 }; 749 std::map<Module *, SubmoduleState> Submodules; 750 751 /// The preprocessor state for preprocessing outside of any submodule. 752 SubmoduleState NullSubmoduleState; 753 754 /// The current submodule state. Will be \p NullSubmoduleState if we're not 755 /// in a submodule. 756 SubmoduleState *CurSubmoduleState; 757 758 /// The set of known macros exported from modules. 759 llvm::FoldingSet<ModuleMacro> ModuleMacros; 760 761 /// The names of potential module macros that we've not yet processed. 762 llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames; 763 764 /// The list of module macros, for each identifier, that are not overridden by 765 /// any other module macro. 766 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>> 767 LeafModuleMacros; 768 769 /// Macros that we want to warn because they are not used at the end 770 /// of the translation unit. 771 /// 772 /// We store just their SourceLocations instead of 773 /// something like MacroInfo*. The benefit of this is that when we are 774 /// deserializing from PCH, we don't need to deserialize identifier & macros 775 /// just so that we can report that they are unused, we just warn using 776 /// the SourceLocations of this set (that will be filled by the ASTReader). 777 /// We are using SmallPtrSet instead of a vector for faster removal. 778 using WarnUnusedMacroLocsTy = llvm::SmallPtrSet<SourceLocation, 32>; 779 WarnUnusedMacroLocsTy WarnUnusedMacroLocs; 780 781 /// A "freelist" of MacroArg objects that can be 782 /// reused for quick allocation. 783 MacroArgs *MacroArgCache = nullptr; 784 785 /// For each IdentifierInfo used in a \#pragma push_macro directive, 786 /// we keep a MacroInfo stack used to restore the previous macro value. 787 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>> 788 PragmaPushMacroInfo; 789 790 // Various statistics we track for performance analysis. 791 unsigned NumDirectives = 0; 792 unsigned NumDefined = 0; 793 unsigned NumUndefined = 0; 794 unsigned NumPragma = 0; 795 unsigned NumIf = 0; 796 unsigned NumElse = 0; 797 unsigned NumEndif = 0; 798 unsigned NumEnteredSourceFiles = 0; 799 unsigned MaxIncludeStackDepth = 0; 800 unsigned NumMacroExpanded = 0; 801 unsigned NumFnMacroExpanded = 0; 802 unsigned NumBuiltinMacroExpanded = 0; 803 unsigned NumFastMacroExpanded = 0; 804 unsigned NumTokenPaste = 0; 805 unsigned NumFastTokenPaste = 0; 806 unsigned NumSkipped = 0; 807 808 /// The predefined macros that preprocessor should use from the 809 /// command line etc. 810 std::string Predefines; 811 812 /// The file ID for the preprocessor predefines. 813 FileID PredefinesFileID; 814 815 /// The file ID for the PCH through header. 816 FileID PCHThroughHeaderFileID; 817 818 /// Whether tokens are being skipped until a #pragma hdrstop is seen. 819 bool SkippingUntilPragmaHdrStop = false; 820 821 /// Whether tokens are being skipped until the through header is seen. 822 bool SkippingUntilPCHThroughHeader = false; 823 824 /// \{ 825 /// Cache of macro expanders to reduce malloc traffic. 826 enum { TokenLexerCacheSize = 8 }; 827 unsigned NumCachedTokenLexers; 828 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize]; 829 /// \} 830 831 /// Keeps macro expanded tokens for TokenLexers. 832 // 833 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 834 /// going to lex in the cache and when it finishes the tokens are removed 835 /// from the end of the cache. 836 SmallVector<Token, 16> MacroExpandedTokens; 837 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack; 838 839 /// A record of the macro definitions and expansions that 840 /// occurred during preprocessing. 841 /// 842 /// This is an optional side structure that can be enabled with 843 /// \c createPreprocessingRecord() prior to preprocessing. 844 PreprocessingRecord *Record = nullptr; 845 846 /// Cached tokens state. 847 using CachedTokensTy = SmallVector<Token, 1>; 848 849 /// Cached tokens are stored here when we do backtracking or 850 /// lookahead. They are "lexed" by the CachingLex() method. 851 CachedTokensTy CachedTokens; 852 853 /// The position of the cached token that CachingLex() should 854 /// "lex" next. 855 /// 856 /// If it points beyond the CachedTokens vector, it means that a normal 857 /// Lex() should be invoked. 858 CachedTokensTy::size_type CachedLexPos = 0; 859 860 /// Stack of backtrack positions, allowing nested backtracks. 861 /// 862 /// The EnableBacktrackAtThisPos() method pushes a position to 863 /// indicate where CachedLexPos should be set when the BackTrack() method is 864 /// invoked (at which point the last position is popped). 865 std::vector<CachedTokensTy::size_type> BacktrackPositions; 866 867 struct MacroInfoChain { 868 MacroInfo MI; 869 MacroInfoChain *Next; 870 }; 871 872 /// MacroInfos are managed as a chain for easy disposal. This is the head 873 /// of that list. 874 MacroInfoChain *MIChainHead = nullptr; 875 876 void updateOutOfDateIdentifier(IdentifierInfo &II) const; 877 878 public: 879 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, 880 DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM, 881 HeaderSearch &Headers, ModuleLoader &TheModuleLoader, 882 IdentifierInfoLookup *IILookup = nullptr, 883 bool OwnsHeaderSearch = false, 884 TranslationUnitKind TUKind = TU_Complete); 885 886 ~Preprocessor(); 887 888 /// Initialize the preprocessor using information about the target. 889 /// 890 /// \param Target is owned by the caller and must remain valid for the 891 /// lifetime of the preprocessor. 892 /// \param AuxTarget is owned by the caller and must remain valid for 893 /// the lifetime of the preprocessor. 894 void Initialize(const TargetInfo &Target, 895 const TargetInfo *AuxTarget = nullptr); 896 897 /// Initialize the preprocessor to parse a model file 898 /// 899 /// To parse model files the preprocessor of the original source is reused to 900 /// preserver the identifier table. However to avoid some duplicate 901 /// information in the preprocessor some cleanup is needed before it is used 902 /// to parse model files. This method does that cleanup. 903 void InitializeForModelFile(); 904 905 /// Cleanup after model file parsing 906 void FinalizeForModelFile(); 907 908 /// Retrieve the preprocessor options used to initialize this 909 /// preprocessor. getPreprocessorOpts()910 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; } 911 getDiagnostics()912 DiagnosticsEngine &getDiagnostics() const { return *Diags; } setDiagnostics(DiagnosticsEngine & D)913 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; } 914 getLangOpts()915 const LangOptions &getLangOpts() const { return LangOpts; } getTargetInfo()916 const TargetInfo &getTargetInfo() const { return *Target; } getAuxTargetInfo()917 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } getFileManager()918 FileManager &getFileManager() const { return FileMgr; } getSourceManager()919 SourceManager &getSourceManager() const { return SourceMgr; } getHeaderSearchInfo()920 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } 921 getIdentifierTable()922 IdentifierTable &getIdentifierTable() { return Identifiers; } getIdentifierTable()923 const IdentifierTable &getIdentifierTable() const { return Identifiers; } getSelectorTable()924 SelectorTable &getSelectorTable() { return Selectors; } getBuiltinInfo()925 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; } getPreprocessorAllocator()926 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; } 927 setExternalSource(ExternalPreprocessorSource * Source)928 void setExternalSource(ExternalPreprocessorSource *Source) { 929 ExternalSource = Source; 930 } 931 getExternalSource()932 ExternalPreprocessorSource *getExternalSource() const { 933 return ExternalSource; 934 } 935 936 /// Retrieve the module loader associated with this preprocessor. getModuleLoader()937 ModuleLoader &getModuleLoader() const { return TheModuleLoader; } 938 hadModuleLoaderFatalFailure()939 bool hadModuleLoaderFatalFailure() const { 940 return TheModuleLoader.HadFatalFailure; 941 } 942 943 /// Retrieve the number of Directives that have been processed by the 944 /// Preprocessor. getNumDirectives()945 unsigned getNumDirectives() const { 946 return NumDirectives; 947 } 948 949 /// True if we are currently preprocessing a #if or #elif directive isParsingIfOrElifDirective()950 bool isParsingIfOrElifDirective() const { 951 return ParsingIfOrElifDirective; 952 } 953 954 /// Control whether the preprocessor retains comments in output. SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)955 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { 956 this->KeepComments = KeepComments | KeepMacroComments; 957 this->KeepMacroComments = KeepMacroComments; 958 } 959 getCommentRetentionState()960 bool getCommentRetentionState() const { return KeepComments; } 961 setPragmasEnabled(bool Enabled)962 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; } getPragmasEnabled()963 bool getPragmasEnabled() const { return PragmasEnabled; } 964 SetSuppressIncludeNotFoundError(bool Suppress)965 void SetSuppressIncludeNotFoundError(bool Suppress) { 966 SuppressIncludeNotFoundError = Suppress; 967 } 968 GetSuppressIncludeNotFoundError()969 bool GetSuppressIncludeNotFoundError() { 970 return SuppressIncludeNotFoundError; 971 } 972 973 /// Sets whether the preprocessor is responsible for producing output or if 974 /// it is producing tokens to be consumed by Parse and Sema. setPreprocessedOutput(bool IsPreprocessedOutput)975 void setPreprocessedOutput(bool IsPreprocessedOutput) { 976 PreprocessedOutput = IsPreprocessedOutput; 977 } 978 979 /// Returns true if the preprocessor is responsible for generating output, 980 /// false if it is producing tokens to be consumed by Parse and Sema. isPreprocessedOutput()981 bool isPreprocessedOutput() const { return PreprocessedOutput; } 982 983 /// Return true if we are lexing directly from the specified lexer. isCurrentLexer(const PreprocessorLexer * L)984 bool isCurrentLexer(const PreprocessorLexer *L) const { 985 return CurPPLexer == L; 986 } 987 988 /// Return the current lexer being lexed from. 989 /// 990 /// Note that this ignores any potentially active macro expansions and _Pragma 991 /// expansions going on at the time. getCurrentLexer()992 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; } 993 994 /// Return the current file lexer being lexed from. 995 /// 996 /// Note that this ignores any potentially active macro expansions and _Pragma 997 /// expansions going on at the time. 998 PreprocessorLexer *getCurrentFileLexer() const; 999 1000 /// Return the submodule owning the file being lexed. This may not be 1001 /// the current module if we have changed modules since entering the file. getCurrentLexerSubmodule()1002 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; } 1003 1004 /// Returns the FileID for the preprocessor predefines. getPredefinesFileID()1005 FileID getPredefinesFileID() const { return PredefinesFileID; } 1006 1007 /// \{ 1008 /// Accessors for preprocessor callbacks. 1009 /// 1010 /// Note that this class takes ownership of any PPCallbacks object given to 1011 /// it. getPPCallbacks()1012 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } addPPCallbacks(std::unique_ptr<PPCallbacks> C)1013 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) { 1014 if (Callbacks) 1015 C = std::make_unique<PPChainedCallbacks>(std::move(C), 1016 std::move(Callbacks)); 1017 Callbacks = std::move(C); 1018 } 1019 /// \} 1020 1021 /// Get the number of tokens processed so far. getTokenCount()1022 unsigned getTokenCount() const { return TokenCount; } 1023 1024 /// Get the max number of tokens before issuing a -Wmax-tokens warning. getMaxTokens()1025 unsigned getMaxTokens() const { return MaxTokens; } 1026 overrideMaxTokens(unsigned Value,SourceLocation Loc)1027 void overrideMaxTokens(unsigned Value, SourceLocation Loc) { 1028 MaxTokens = Value; 1029 MaxTokensOverrideLoc = Loc; 1030 }; 1031 getMaxTokensOverrideLoc()1032 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; } 1033 1034 /// Register a function that would be called on each token in the final 1035 /// expanded token stream. 1036 /// This also reports annotation tokens produced by the parser. setTokenWatcher(llvm::unique_function<void (const clang::Token &)> F)1037 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) { 1038 OnToken = std::move(F); 1039 } 1040 isMacroDefined(StringRef Id)1041 bool isMacroDefined(StringRef Id) { 1042 return isMacroDefined(&Identifiers.get(Id)); 1043 } isMacroDefined(const IdentifierInfo * II)1044 bool isMacroDefined(const IdentifierInfo *II) { 1045 return II->hasMacroDefinition() && 1046 (!getLangOpts().Modules || (bool)getMacroDefinition(II)); 1047 } 1048 1049 /// Determine whether II is defined as a macro within the module M, 1050 /// if that is a module that we've already preprocessed. Does not check for 1051 /// macros imported into M. isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)1052 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) { 1053 if (!II->hasMacroDefinition()) 1054 return false; 1055 auto I = Submodules.find(M); 1056 if (I == Submodules.end()) 1057 return false; 1058 auto J = I->second.Macros.find(II); 1059 if (J == I->second.Macros.end()) 1060 return false; 1061 auto *MD = J->second.getLatest(); 1062 return MD && MD->isDefined(); 1063 } 1064 getMacroDefinition(const IdentifierInfo * II)1065 MacroDefinition getMacroDefinition(const IdentifierInfo *II) { 1066 if (!II->hasMacroDefinition()) 1067 return {}; 1068 1069 MacroState &S = CurSubmoduleState->Macros[II]; 1070 auto *MD = S.getLatest(); 1071 while (MD && isa<VisibilityMacroDirective>(MD)) 1072 MD = MD->getPrevious(); 1073 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD), 1074 S.getActiveModuleMacros(*this, II), 1075 S.isAmbiguous(*this, II)); 1076 } 1077 getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)1078 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II, 1079 SourceLocation Loc) { 1080 if (!II->hadMacroDefinition()) 1081 return {}; 1082 1083 MacroState &S = CurSubmoduleState->Macros[II]; 1084 MacroDirective::DefInfo DI; 1085 if (auto *MD = S.getLatest()) 1086 DI = MD->findDirectiveAtLoc(Loc, getSourceManager()); 1087 // FIXME: Compute the set of active module macros at the specified location. 1088 return MacroDefinition(DI.getDirective(), 1089 S.getActiveModuleMacros(*this, II), 1090 S.isAmbiguous(*this, II)); 1091 } 1092 1093 /// Given an identifier, return its latest non-imported MacroDirective 1094 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd. getLocalMacroDirective(const IdentifierInfo * II)1095 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const { 1096 if (!II->hasMacroDefinition()) 1097 return nullptr; 1098 1099 auto *MD = getLocalMacroDirectiveHistory(II); 1100 if (!MD || MD->getDefinition().isUndefined()) 1101 return nullptr; 1102 1103 return MD; 1104 } 1105 getMacroInfo(const IdentifierInfo * II)1106 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const { 1107 return const_cast<Preprocessor*>(this)->getMacroInfo(II); 1108 } 1109 getMacroInfo(const IdentifierInfo * II)1110 MacroInfo *getMacroInfo(const IdentifierInfo *II) { 1111 if (!II->hasMacroDefinition()) 1112 return nullptr; 1113 if (auto MD = getMacroDefinition(II)) 1114 return MD.getMacroInfo(); 1115 return nullptr; 1116 } 1117 1118 /// Given an identifier, return the latest non-imported macro 1119 /// directive for that identifier. 1120 /// 1121 /// One can iterate over all previous macro directives from the most recent 1122 /// one. 1123 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const; 1124 1125 /// Add a directive to the macro directive history for this identifier. 1126 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD); appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)1127 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI, 1128 SourceLocation Loc) { 1129 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc); 1130 appendMacroDirective(II, MD); 1131 return MD; 1132 } appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)1133 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, 1134 MacroInfo *MI) { 1135 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc()); 1136 } 1137 1138 /// Set a MacroDirective that was loaded from a PCH file. 1139 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED, 1140 MacroDirective *MD); 1141 1142 /// Register an exported macro for a module and identifier. 1143 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, 1144 ArrayRef<ModuleMacro *> Overrides, bool &IsNew); 1145 ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II); 1146 1147 /// Get the list of leaf (non-overridden) module macros for a name. getLeafModuleMacros(const IdentifierInfo * II)1148 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const { 1149 if (II->isOutOfDate()) 1150 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II)); 1151 auto I = LeafModuleMacros.find(II); 1152 if (I != LeafModuleMacros.end()) 1153 return I->second; 1154 return None; 1155 } 1156 1157 /// \{ 1158 /// Iterators for the macro history table. Currently defined macros have 1159 /// IdentifierInfo::hasMacroDefinition() set and an empty 1160 /// MacroInfo::getUndefLoc() at the head of the list. 1161 using macro_iterator = MacroMap::const_iterator; 1162 1163 macro_iterator macro_begin(bool IncludeExternalMacros = true) const; 1164 macro_iterator macro_end(bool IncludeExternalMacros = true) const; 1165 1166 llvm::iterator_range<macro_iterator> 1167 macros(bool IncludeExternalMacros = true) const { 1168 macro_iterator begin = macro_begin(IncludeExternalMacros); 1169 macro_iterator end = macro_end(IncludeExternalMacros); 1170 return llvm::make_range(begin, end); 1171 } 1172 1173 /// \} 1174 1175 /// Return the name of the macro defined before \p Loc that has 1176 /// spelling \p Tokens. If there are multiple macros with same spelling, 1177 /// return the last one defined. 1178 StringRef getLastMacroWithSpelling(SourceLocation Loc, 1179 ArrayRef<TokenValue> Tokens) const; 1180 getPredefines()1181 const std::string &getPredefines() const { return Predefines; } 1182 1183 /// Set the predefines for this Preprocessor. 1184 /// 1185 /// These predefines are automatically injected when parsing the main file. setPredefines(const char * P)1186 void setPredefines(const char *P) { Predefines = P; } setPredefines(StringRef P)1187 void setPredefines(StringRef P) { Predefines = std::string(P); } 1188 1189 /// Return information about the specified preprocessor 1190 /// identifier token. getIdentifierInfo(StringRef Name)1191 IdentifierInfo *getIdentifierInfo(StringRef Name) const { 1192 return &Identifiers.get(Name); 1193 } 1194 1195 /// Add the specified pragma handler to this preprocessor. 1196 /// 1197 /// If \p Namespace is non-null, then it is a token required to exist on the 1198 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". 1199 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler); AddPragmaHandler(PragmaHandler * Handler)1200 void AddPragmaHandler(PragmaHandler *Handler) { 1201 AddPragmaHandler(StringRef(), Handler); 1202 } 1203 1204 /// Remove the specific pragma handler from this preprocessor. 1205 /// 1206 /// If \p Namespace is non-null, then it should be the namespace that 1207 /// \p Handler was added to. It is an error to remove a handler that 1208 /// has not been registered. 1209 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler); RemovePragmaHandler(PragmaHandler * Handler)1210 void RemovePragmaHandler(PragmaHandler *Handler) { 1211 RemovePragmaHandler(StringRef(), Handler); 1212 } 1213 1214 /// Install empty handlers for all pragmas (making them ignored). 1215 void IgnorePragmas(); 1216 1217 /// Add the specified comment handler to the preprocessor. 1218 void addCommentHandler(CommentHandler *Handler); 1219 1220 /// Remove the specified comment handler. 1221 /// 1222 /// It is an error to remove a handler that has not been registered. 1223 void removeCommentHandler(CommentHandler *Handler); 1224 1225 /// Set the code completion handler to the given object. setCodeCompletionHandler(CodeCompletionHandler & Handler)1226 void setCodeCompletionHandler(CodeCompletionHandler &Handler) { 1227 CodeComplete = &Handler; 1228 } 1229 1230 /// Retrieve the current code-completion handler. getCodeCompletionHandler()1231 CodeCompletionHandler *getCodeCompletionHandler() const { 1232 return CodeComplete; 1233 } 1234 1235 /// Clear out the code completion handler. clearCodeCompletionHandler()1236 void clearCodeCompletionHandler() { 1237 CodeComplete = nullptr; 1238 } 1239 1240 /// Hook used by the lexer to invoke the "included file" code 1241 /// completion point. 1242 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled); 1243 1244 /// Hook used by the lexer to invoke the "natural language" code 1245 /// completion point. 1246 void CodeCompleteNaturalLanguage(); 1247 1248 /// Set the code completion token for filtering purposes. setCodeCompletionIdentifierInfo(IdentifierInfo * Filter)1249 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) { 1250 CodeCompletionII = Filter; 1251 } 1252 1253 /// Set the code completion token range for detecting replacement range later 1254 /// on. setCodeCompletionTokenRange(const SourceLocation Start,const SourceLocation End)1255 void setCodeCompletionTokenRange(const SourceLocation Start, 1256 const SourceLocation End) { 1257 CodeCompletionTokenRange = {Start, End}; 1258 } getCodeCompletionTokenRange()1259 SourceRange getCodeCompletionTokenRange() const { 1260 return CodeCompletionTokenRange; 1261 } 1262 1263 /// Get the code completion token for filtering purposes. getCodeCompletionFilter()1264 StringRef getCodeCompletionFilter() { 1265 if (CodeCompletionII) 1266 return CodeCompletionII->getName(); 1267 return {}; 1268 } 1269 1270 /// Retrieve the preprocessing record, or NULL if there is no 1271 /// preprocessing record. getPreprocessingRecord()1272 PreprocessingRecord *getPreprocessingRecord() const { return Record; } 1273 1274 /// Create a new preprocessing record, which will keep track of 1275 /// all macro expansions, macro definitions, etc. 1276 void createPreprocessingRecord(); 1277 1278 /// Returns true if the FileEntry is the PCH through header. 1279 bool isPCHThroughHeader(const FileEntry *FE); 1280 1281 /// True if creating a PCH with a through header. 1282 bool creatingPCHWithThroughHeader(); 1283 1284 /// True if using a PCH with a through header. 1285 bool usingPCHWithThroughHeader(); 1286 1287 /// True if creating a PCH with a #pragma hdrstop. 1288 bool creatingPCHWithPragmaHdrStop(); 1289 1290 /// True if using a PCH with a #pragma hdrstop. 1291 bool usingPCHWithPragmaHdrStop(); 1292 1293 /// Skip tokens until after the #include of the through header or 1294 /// until after a #pragma hdrstop. 1295 void SkipTokensWhileUsingPCH(); 1296 1297 /// Process directives while skipping until the through header or 1298 /// #pragma hdrstop is found. 1299 void HandleSkippedDirectiveWhileUsingPCH(Token &Result, 1300 SourceLocation HashLoc); 1301 1302 /// Enter the specified FileID as the main source file, 1303 /// which implicitly adds the builtin defines etc. 1304 void EnterMainSourceFile(); 1305 1306 /// Inform the preprocessor callbacks that processing is complete. 1307 void EndSourceFile(); 1308 1309 /// Add a source file to the top of the include stack and 1310 /// start lexing tokens from it instead of the current buffer. 1311 /// 1312 /// Emits a diagnostic, doesn't enter the file, and returns true on error. 1313 bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir, 1314 SourceLocation Loc); 1315 1316 /// Add a Macro to the top of the include stack and start lexing 1317 /// tokens from it instead of the current buffer. 1318 /// 1319 /// \param Args specifies the tokens input to a function-like macro. 1320 /// \param ILEnd specifies the location of the ')' for a function-like macro 1321 /// or the identifier for an object-like macro. 1322 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, 1323 MacroArgs *Args); 1324 1325 private: 1326 /// Add a "macro" context to the top of the include stack, 1327 /// which will cause the lexer to start returning the specified tokens. 1328 /// 1329 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream 1330 /// will not be subject to further macro expansion. Otherwise, these tokens 1331 /// will be re-macro-expanded when/if expansion is enabled. 1332 /// 1333 /// If \p OwnsTokens is false, this method assumes that the specified stream 1334 /// of tokens has a permanent owner somewhere, so they do not need to be 1335 /// copied. If it is true, it assumes the array of tokens is allocated with 1336 /// \c new[] and the Preprocessor will delete[] it. 1337 /// 1338 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag 1339 /// set, see the flag documentation for details. 1340 void EnterTokenStream(const Token *Toks, unsigned NumToks, 1341 bool DisableMacroExpansion, bool OwnsTokens, 1342 bool IsReinject); 1343 1344 public: EnterTokenStream(std::unique_ptr<Token[]> Toks,unsigned NumToks,bool DisableMacroExpansion,bool IsReinject)1345 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks, 1346 bool DisableMacroExpansion, bool IsReinject) { 1347 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true, 1348 IsReinject); 1349 } 1350 EnterTokenStream(ArrayRef<Token> Toks,bool DisableMacroExpansion,bool IsReinject)1351 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion, 1352 bool IsReinject) { 1353 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false, 1354 IsReinject); 1355 } 1356 1357 /// Pop the current lexer/macro exp off the top of the lexer stack. 1358 /// 1359 /// This should only be used in situations where the current state of the 1360 /// top-of-stack lexer is known. 1361 void RemoveTopOfLexerStack(); 1362 1363 /// From the point that this method is called, and until 1364 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor 1365 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will 1366 /// make the Preprocessor re-lex the same tokens. 1367 /// 1368 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can 1369 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will 1370 /// be combined with the EnableBacktrackAtThisPos calls in reverse order. 1371 /// 1372 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack 1373 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of 1374 /// tokens will continue indefinitely. 1375 /// 1376 void EnableBacktrackAtThisPos(); 1377 1378 /// Disable the last EnableBacktrackAtThisPos call. 1379 void CommitBacktrackedTokens(); 1380 1381 /// Make Preprocessor re-lex the tokens that were lexed since 1382 /// EnableBacktrackAtThisPos() was previously called. 1383 void Backtrack(); 1384 1385 /// True if EnableBacktrackAtThisPos() was called and 1386 /// caching of tokens is on. isBacktrackEnabled()1387 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); } 1388 1389 /// Lex the next token for this preprocessor. 1390 void Lex(Token &Result); 1391 1392 /// Lex a token, forming a header-name token if possible. 1393 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); 1394 1395 bool LexAfterModuleImport(Token &Result); 1396 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); 1397 1398 void makeModuleVisible(Module *M, SourceLocation Loc); 1399 getModuleImportLoc(Module * M)1400 SourceLocation getModuleImportLoc(Module *M) const { 1401 return CurSubmoduleState->VisibleModules.getImportLoc(M); 1402 } 1403 1404 /// Lex a string literal, which may be the concatenation of multiple 1405 /// string literals and may even come from macro expansion. 1406 /// \returns true on success, false if a error diagnostic has been generated. LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1407 bool LexStringLiteral(Token &Result, std::string &String, 1408 const char *DiagnosticTag, bool AllowMacroExpansion) { 1409 if (AllowMacroExpansion) 1410 Lex(Result); 1411 else 1412 LexUnexpandedToken(Result); 1413 return FinishLexStringLiteral(Result, String, DiagnosticTag, 1414 AllowMacroExpansion); 1415 } 1416 1417 /// Complete the lexing of a string literal where the first token has 1418 /// already been lexed (see LexStringLiteral). 1419 bool FinishLexStringLiteral(Token &Result, std::string &String, 1420 const char *DiagnosticTag, 1421 bool AllowMacroExpansion); 1422 1423 /// Lex a token. If it's a comment, keep lexing until we get 1424 /// something not a comment. 1425 /// 1426 /// This is useful in -E -C mode where comments would foul up preprocessor 1427 /// directive handling. LexNonComment(Token & Result)1428 void LexNonComment(Token &Result) { 1429 do 1430 Lex(Result); 1431 while (Result.getKind() == tok::comment); 1432 } 1433 1434 /// Just like Lex, but disables macro expansion of identifier tokens. LexUnexpandedToken(Token & Result)1435 void LexUnexpandedToken(Token &Result) { 1436 // Disable macro expansion. 1437 bool OldVal = DisableMacroExpansion; 1438 DisableMacroExpansion = true; 1439 // Lex the token. 1440 Lex(Result); 1441 1442 // Reenable it. 1443 DisableMacroExpansion = OldVal; 1444 } 1445 1446 /// Like LexNonComment, but this disables macro expansion of 1447 /// identifier tokens. LexUnexpandedNonComment(Token & Result)1448 void LexUnexpandedNonComment(Token &Result) { 1449 do 1450 LexUnexpandedToken(Result); 1451 while (Result.getKind() == tok::comment); 1452 } 1453 1454 /// Parses a simple integer literal to get its numeric value. Floating 1455 /// point literals and user defined literals are rejected. Used primarily to 1456 /// handle pragmas that accept integer arguments. 1457 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); 1458 1459 /// Disables macro expansion everywhere except for preprocessor directives. SetMacroExpansionOnlyInDirectives()1460 void SetMacroExpansionOnlyInDirectives() { 1461 DisableMacroExpansion = true; 1462 MacroExpansionInDirectivesOverride = true; 1463 } 1464 1465 /// Peeks ahead N tokens and returns that token without consuming any 1466 /// tokens. 1467 /// 1468 /// LookAhead(0) returns the next token that would be returned by Lex(), 1469 /// LookAhead(1) returns the token after it, etc. This returns normal 1470 /// tokens after phase 5. As such, it is equivalent to using 1471 /// 'Lex', not 'LexUnexpandedToken'. LookAhead(unsigned N)1472 const Token &LookAhead(unsigned N) { 1473 assert(LexLevel == 0 && "cannot use lookahead while lexing"); 1474 if (CachedLexPos + N < CachedTokens.size()) 1475 return CachedTokens[CachedLexPos+N]; 1476 else 1477 return PeekAhead(N+1); 1478 } 1479 1480 /// When backtracking is enabled and tokens are cached, 1481 /// this allows to revert a specific number of tokens. 1482 /// 1483 /// Note that the number of tokens being reverted should be up to the last 1484 /// backtrack position, not more. RevertCachedTokens(unsigned N)1485 void RevertCachedTokens(unsigned N) { 1486 assert(isBacktrackEnabled() && 1487 "Should only be called when tokens are cached for backtracking"); 1488 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back()) 1489 && "Should revert tokens up to the last backtrack position, not more"); 1490 assert(signed(CachedLexPos) - signed(N) >= 0 && 1491 "Corrupted backtrack positions ?"); 1492 CachedLexPos -= N; 1493 } 1494 1495 /// Enters a token in the token stream to be lexed next. 1496 /// 1497 /// If BackTrack() is called afterwards, the token will remain at the 1498 /// insertion point. 1499 /// If \p IsReinject is true, resulting token will have Token::IsReinjected 1500 /// flag set. See the flag documentation for details. EnterToken(const Token & Tok,bool IsReinject)1501 void EnterToken(const Token &Tok, bool IsReinject) { 1502 if (LexLevel) { 1503 // It's not correct in general to enter caching lex mode while in the 1504 // middle of a nested lexing action. 1505 auto TokCopy = std::make_unique<Token[]>(1); 1506 TokCopy[0] = Tok; 1507 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject); 1508 } else { 1509 EnterCachingLexMode(); 1510 assert(IsReinject && "new tokens in the middle of cached stream"); 1511 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); 1512 } 1513 } 1514 1515 /// We notify the Preprocessor that if it is caching tokens (because 1516 /// backtrack is enabled) it should replace the most recent cached tokens 1517 /// with the given annotation token. This function has no effect if 1518 /// backtracking is not enabled. 1519 /// 1520 /// Note that the use of this function is just for optimization, so that the 1521 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is 1522 /// invoked. AnnotateCachedTokens(const Token & Tok)1523 void AnnotateCachedTokens(const Token &Tok) { 1524 assert(Tok.isAnnotation() && "Expected annotation token"); 1525 if (CachedLexPos != 0 && isBacktrackEnabled()) 1526 AnnotatePreviousCachedTokens(Tok); 1527 } 1528 1529 /// Get the location of the last cached token, suitable for setting the end 1530 /// location of an annotation token. getLastCachedTokenLocation()1531 SourceLocation getLastCachedTokenLocation() const { 1532 assert(CachedLexPos != 0); 1533 return CachedTokens[CachedLexPos-1].getLastLoc(); 1534 } 1535 1536 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in 1537 /// CachedTokens. 1538 bool IsPreviousCachedToken(const Token &Tok) const; 1539 1540 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens 1541 /// in \p NewToks. 1542 /// 1543 /// Useful when a token needs to be split in smaller ones and CachedTokens 1544 /// most recent token must to be updated to reflect that. 1545 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks); 1546 1547 /// Replace the last token with an annotation token. 1548 /// 1549 /// Like AnnotateCachedTokens(), this routine replaces an 1550 /// already-parsed (and resolved) token with an annotation 1551 /// token. However, this routine only replaces the last token with 1552 /// the annotation token; it does not affect any other cached 1553 /// tokens. This function has no effect if backtracking is not 1554 /// enabled. ReplaceLastTokenWithAnnotation(const Token & Tok)1555 void ReplaceLastTokenWithAnnotation(const Token &Tok) { 1556 assert(Tok.isAnnotation() && "Expected annotation token"); 1557 if (CachedLexPos != 0 && isBacktrackEnabled()) 1558 CachedTokens[CachedLexPos-1] = Tok; 1559 } 1560 1561 /// Enter an annotation token into the token stream. 1562 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind, 1563 void *AnnotationVal); 1564 1565 /// Determine whether it's possible for a future call to Lex to produce an 1566 /// annotation token created by a previous call to EnterAnnotationToken. mightHavePendingAnnotationTokens()1567 bool mightHavePendingAnnotationTokens() { 1568 return CurLexerKind != CLK_Lexer; 1569 } 1570 1571 /// Update the current token to represent the provided 1572 /// identifier, in order to cache an action performed by typo correction. TypoCorrectToken(const Token & Tok)1573 void TypoCorrectToken(const Token &Tok) { 1574 assert(Tok.getIdentifierInfo() && "Expected identifier token"); 1575 if (CachedLexPos != 0 && isBacktrackEnabled()) 1576 CachedTokens[CachedLexPos-1] = Tok; 1577 } 1578 1579 /// Recompute the current lexer kind based on the CurLexer/ 1580 /// CurTokenLexer pointers. 1581 void recomputeCurLexerKind(); 1582 1583 /// Returns true if incremental processing is enabled isIncrementalProcessingEnabled()1584 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } 1585 1586 /// Enables the incremental processing 1587 void enableIncrementalProcessing(bool value = true) { 1588 IncrementalProcessing = value; 1589 } 1590 1591 /// Specify the point at which code-completion will be performed. 1592 /// 1593 /// \param File the file in which code completion should occur. If 1594 /// this file is included multiple times, code-completion will 1595 /// perform completion the first time it is included. If NULL, this 1596 /// function clears out the code-completion point. 1597 /// 1598 /// \param Line the line at which code completion should occur 1599 /// (1-based). 1600 /// 1601 /// \param Column the column at which code completion should occur 1602 /// (1-based). 1603 /// 1604 /// \returns true if an error occurred, false otherwise. 1605 bool SetCodeCompletionPoint(const FileEntry *File, 1606 unsigned Line, unsigned Column); 1607 1608 /// Determine if we are performing code completion. isCodeCompletionEnabled()1609 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; } 1610 1611 /// Returns the location of the code-completion point. 1612 /// 1613 /// Returns an invalid location if code-completion is not enabled or the file 1614 /// containing the code-completion point has not been lexed yet. getCodeCompletionLoc()1615 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; } 1616 1617 /// Returns the start location of the file of code-completion point. 1618 /// 1619 /// Returns an invalid location if code-completion is not enabled or the file 1620 /// containing the code-completion point has not been lexed yet. getCodeCompletionFileLoc()1621 SourceLocation getCodeCompletionFileLoc() const { 1622 return CodeCompletionFileLoc; 1623 } 1624 1625 /// Returns true if code-completion is enabled and we have hit the 1626 /// code-completion point. isCodeCompletionReached()1627 bool isCodeCompletionReached() const { return CodeCompletionReached; } 1628 1629 /// Note that we hit the code-completion point. setCodeCompletionReached()1630 void setCodeCompletionReached() { 1631 assert(isCodeCompletionEnabled() && "Code-completion not enabled!"); 1632 CodeCompletionReached = true; 1633 // Silence any diagnostics that occur after we hit the code-completion. 1634 getDiagnostics().setSuppressAllDiagnostics(true); 1635 } 1636 1637 /// The location of the currently-active \#pragma clang 1638 /// arc_cf_code_audited begin. 1639 /// 1640 /// Returns an invalid location if there is no such pragma active. 1641 std::pair<IdentifierInfo *, SourceLocation> getPragmaARCCFCodeAuditedInfo()1642 getPragmaARCCFCodeAuditedInfo() const { 1643 return PragmaARCCFCodeAuditedInfo; 1644 } 1645 1646 /// Set the location of the currently-active \#pragma clang 1647 /// arc_cf_code_audited begin. An invalid location ends the pragma. setPragmaARCCFCodeAuditedInfo(IdentifierInfo * Ident,SourceLocation Loc)1648 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, 1649 SourceLocation Loc) { 1650 PragmaARCCFCodeAuditedInfo = {Ident, Loc}; 1651 } 1652 1653 /// The location of the currently-active \#pragma clang 1654 /// assume_nonnull begin. 1655 /// 1656 /// Returns an invalid location if there is no such pragma active. getPragmaAssumeNonNullLoc()1657 SourceLocation getPragmaAssumeNonNullLoc() const { 1658 return PragmaAssumeNonNullLoc; 1659 } 1660 1661 /// Set the location of the currently-active \#pragma clang 1662 /// assume_nonnull begin. An invalid location ends the pragma. setPragmaAssumeNonNullLoc(SourceLocation Loc)1663 void setPragmaAssumeNonNullLoc(SourceLocation Loc) { 1664 PragmaAssumeNonNullLoc = Loc; 1665 } 1666 1667 /// Set the directory in which the main file should be considered 1668 /// to have been found, if it is not a real file. setMainFileDir(const DirectoryEntry * Dir)1669 void setMainFileDir(const DirectoryEntry *Dir) { 1670 MainFileDir = Dir; 1671 } 1672 1673 /// Instruct the preprocessor to skip part of the main source file. 1674 /// 1675 /// \param Bytes The number of bytes in the preamble to skip. 1676 /// 1677 /// \param StartOfLine Whether skipping these bytes puts the lexer at the 1678 /// start of a line. setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)1679 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) { 1680 SkipMainFilePreamble.first = Bytes; 1681 SkipMainFilePreamble.second = StartOfLine; 1682 } 1683 1684 /// Forwarding function for diagnostics. This emits a diagnostic at 1685 /// the specified Token's location, translating the token's start 1686 /// position in the current buffer into a SourcePosition object for rendering. Diag(SourceLocation Loc,unsigned DiagID)1687 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const { 1688 return Diags->Report(Loc, DiagID); 1689 } 1690 Diag(const Token & Tok,unsigned DiagID)1691 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const { 1692 return Diags->Report(Tok.getLocation(), DiagID); 1693 } 1694 1695 /// Return the 'spelling' of the token at the given 1696 /// location; does not go up to the spelling location or down to the 1697 /// expansion location. 1698 /// 1699 /// \param buffer A buffer which will be used only if the token requires 1700 /// "cleaning", e.g. if it contains trigraphs or escaped newlines 1701 /// \param invalid If non-null, will be set \c true if an error occurs. 1702 StringRef getSpelling(SourceLocation loc, 1703 SmallVectorImpl<char> &buffer, 1704 bool *invalid = nullptr) const { 1705 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid); 1706 } 1707 1708 /// Return the 'spelling' of the Tok token. 1709 /// 1710 /// The spelling of a token is the characters used to represent the token in 1711 /// the source file after trigraph expansion and escaped-newline folding. In 1712 /// particular, this wants to get the true, uncanonicalized, spelling of 1713 /// things like digraphs, UCNs, etc. 1714 /// 1715 /// \param Invalid If non-null, will be set \c true if an error occurs. 1716 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const { 1717 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid); 1718 } 1719 1720 /// Get the spelling of a token into a preallocated buffer, instead 1721 /// of as an std::string. 1722 /// 1723 /// The caller is required to allocate enough space for the token, which is 1724 /// guaranteed to be at least Tok.getLength() bytes long. The length of the 1725 /// actual result is returned. 1726 /// 1727 /// Note that this method may do two possible things: it may either fill in 1728 /// the buffer specified with characters, or it may *change the input pointer* 1729 /// to point to a constant buffer with the data already in it (avoiding a 1730 /// copy). The caller is not allowed to modify the returned buffer pointer 1731 /// if an internal buffer is returned. 1732 unsigned getSpelling(const Token &Tok, const char *&Buffer, 1733 bool *Invalid = nullptr) const { 1734 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid); 1735 } 1736 1737 /// Get the spelling of a token into a SmallVector. 1738 /// 1739 /// Note that the returned StringRef may not point to the 1740 /// supplied buffer if a copy can be avoided. 1741 StringRef getSpelling(const Token &Tok, 1742 SmallVectorImpl<char> &Buffer, 1743 bool *Invalid = nullptr) const; 1744 1745 /// Relex the token at the specified location. 1746 /// \returns true if there was a failure, false on success. 1747 bool getRawToken(SourceLocation Loc, Token &Result, 1748 bool IgnoreWhiteSpace = false) { 1749 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace); 1750 } 1751 1752 /// Given a Token \p Tok that is a numeric constant with length 1, 1753 /// return the character. 1754 char 1755 getSpellingOfSingleCharacterNumericConstant(const Token &Tok, 1756 bool *Invalid = nullptr) const { 1757 assert(Tok.is(tok::numeric_constant) && 1758 Tok.getLength() == 1 && "Called on unsupported token"); 1759 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); 1760 1761 // If the token is carrying a literal data pointer, just use it. 1762 if (const char *D = Tok.getLiteralData()) 1763 return *D; 1764 1765 // Otherwise, fall back on getCharacterData, which is slower, but always 1766 // works. 1767 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid); 1768 } 1769 1770 /// Retrieve the name of the immediate macro expansion. 1771 /// 1772 /// This routine starts from a source location, and finds the name of the 1773 /// macro responsible for its immediate expansion. It looks through any 1774 /// intervening macro argument expansions to compute this. It returns a 1775 /// StringRef that refers to the SourceManager-owned buffer of the source 1776 /// where that macro name is spelled. Thus, the result shouldn't out-live 1777 /// the SourceManager. getImmediateMacroName(SourceLocation Loc)1778 StringRef getImmediateMacroName(SourceLocation Loc) { 1779 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts()); 1780 } 1781 1782 /// Plop the specified string into a scratch buffer and set the 1783 /// specified token's location and length to it. 1784 /// 1785 /// If specified, the source location provides a location of the expansion 1786 /// point of the token. 1787 void CreateString(StringRef Str, Token &Tok, 1788 SourceLocation ExpansionLocStart = SourceLocation(), 1789 SourceLocation ExpansionLocEnd = SourceLocation()); 1790 1791 /// Split the first Length characters out of the token starting at TokLoc 1792 /// and return a location pointing to the split token. Re-lexing from the 1793 /// split token will return the split token rather than the original. 1794 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length); 1795 1796 /// Computes the source location just past the end of the 1797 /// token at this source location. 1798 /// 1799 /// This routine can be used to produce a source location that 1800 /// points just past the end of the token referenced by \p Loc, and 1801 /// is generally used when a diagnostic needs to point just after a 1802 /// token where it expected something different that it received. If 1803 /// the returned source location would not be meaningful (e.g., if 1804 /// it points into a macro), this routine returns an invalid 1805 /// source location. 1806 /// 1807 /// \param Offset an offset from the end of the token, where the source 1808 /// location should refer to. The default offset (0) produces a source 1809 /// location pointing just past the end of the token; an offset of 1 produces 1810 /// a source location pointing to the last character in the token, etc. 1811 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { 1812 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts); 1813 } 1814 1815 /// Returns true if the given MacroID location points at the first 1816 /// token of the macro expansion. 1817 /// 1818 /// \param MacroBegin If non-null and function returns true, it is set to 1819 /// begin location of the macro. 1820 bool isAtStartOfMacroExpansion(SourceLocation loc, 1821 SourceLocation *MacroBegin = nullptr) const { 1822 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts, 1823 MacroBegin); 1824 } 1825 1826 /// Returns true if the given MacroID location points at the last 1827 /// token of the macro expansion. 1828 /// 1829 /// \param MacroEnd If non-null and function returns true, it is set to 1830 /// end location of the macro. 1831 bool isAtEndOfMacroExpansion(SourceLocation loc, 1832 SourceLocation *MacroEnd = nullptr) const { 1833 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd); 1834 } 1835 1836 /// Print the token to stderr, used for debugging. 1837 void DumpToken(const Token &Tok, bool DumpFlags = false) const; 1838 void DumpLocation(SourceLocation Loc) const; 1839 void DumpMacro(const MacroInfo &MI) const; 1840 void dumpMacroInfo(const IdentifierInfo *II); 1841 1842 /// Given a location that specifies the start of a 1843 /// token, return a new location that specifies a character within the token. AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)1844 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, 1845 unsigned Char) const { 1846 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts); 1847 } 1848 1849 /// Increment the counters for the number of token paste operations 1850 /// performed. 1851 /// 1852 /// If fast was specified, this is a 'fast paste' case we handled. IncrementPasteCounter(bool isFast)1853 void IncrementPasteCounter(bool isFast) { 1854 if (isFast) 1855 ++NumFastTokenPaste; 1856 else 1857 ++NumTokenPaste; 1858 } 1859 1860 void PrintStats(); 1861 1862 size_t getTotalMemory() const; 1863 1864 /// When the macro expander pastes together a comment (/##/) in Microsoft 1865 /// mode, this method handles updating the current state, returning the 1866 /// token on the next source line. 1867 void HandleMicrosoftCommentPaste(Token &Tok); 1868 1869 //===--------------------------------------------------------------------===// 1870 // Preprocessor callback methods. These are invoked by a lexer as various 1871 // directives and events are found. 1872 1873 /// Given a tok::raw_identifier token, look up the 1874 /// identifier information for the token and install it into the token, 1875 /// updating the token kind accordingly. 1876 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const; 1877 1878 private: 1879 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons; 1880 1881 public: 1882 /// Specifies the reason for poisoning an identifier. 1883 /// 1884 /// If that identifier is accessed while poisoned, then this reason will be 1885 /// used instead of the default "poisoned" diagnostic. 1886 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID); 1887 1888 /// Display reason for poisoned identifier. 1889 void HandlePoisonedIdentifier(Token & Identifier); 1890 MaybeHandlePoisonedIdentifier(Token & Identifier)1891 void MaybeHandlePoisonedIdentifier(Token & Identifier) { 1892 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) { 1893 if(II->isPoisoned()) { 1894 HandlePoisonedIdentifier(Identifier); 1895 } 1896 } 1897 } 1898 1899 private: 1900 /// Identifiers used for SEH handling in Borland. These are only 1901 /// allowed in particular circumstances 1902 // __except block 1903 IdentifierInfo *Ident__exception_code, 1904 *Ident___exception_code, 1905 *Ident_GetExceptionCode; 1906 // __except filter expression 1907 IdentifierInfo *Ident__exception_info, 1908 *Ident___exception_info, 1909 *Ident_GetExceptionInfo; 1910 // __finally 1911 IdentifierInfo *Ident__abnormal_termination, 1912 *Ident___abnormal_termination, 1913 *Ident_AbnormalTermination; 1914 1915 const char *getCurLexerEndPos(); 1916 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod); 1917 1918 public: 1919 void PoisonSEHIdentifiers(bool Poison = true); // Borland 1920 1921 /// Callback invoked when the lexer reads an identifier and has 1922 /// filled in the tokens IdentifierInfo member. 1923 /// 1924 /// This callback potentially macro expands it or turns it into a named 1925 /// token (like 'for'). 1926 /// 1927 /// \returns true if we actually computed a token, false if we need to 1928 /// lex again. 1929 bool HandleIdentifier(Token &Identifier); 1930 1931 /// Callback invoked when the lexer hits the end of the current file. 1932 /// 1933 /// This either returns the EOF token and returns true, or 1934 /// pops a level off the include stack and returns false, at which point the 1935 /// client should call lex again. 1936 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); 1937 1938 /// Callback invoked when the current TokenLexer hits the end of its 1939 /// token stream. 1940 bool HandleEndOfTokenLexer(Token &Result); 1941 1942 /// Callback invoked when the lexer sees a # token at the start of a 1943 /// line. 1944 /// 1945 /// This consumes the directive, modifies the lexer/preprocessor state, and 1946 /// advances the lexer(s) so that the next token read is the correct one. 1947 void HandleDirective(Token &Result); 1948 1949 /// Ensure that the next token is a tok::eod token. 1950 /// 1951 /// If not, emit a diagnostic and consume up until the eod. 1952 /// If \p EnableMacros is true, then we consider macros that expand to zero 1953 /// tokens as being ok. 1954 /// 1955 /// \return The location of the end of the directive (the terminating 1956 /// newline). 1957 SourceLocation CheckEndOfDirective(const char *DirType, 1958 bool EnableMacros = false); 1959 1960 /// Read and discard all tokens remaining on the current line until 1961 /// the tok::eod token is found. Returns the range of the skipped tokens. 1962 SourceRange DiscardUntilEndOfDirective(); 1963 1964 /// Returns true if the preprocessor has seen a use of 1965 /// __DATE__ or __TIME__ in the file so far. SawDateOrTime()1966 bool SawDateOrTime() const { 1967 return DATELoc != SourceLocation() || TIMELoc != SourceLocation(); 1968 } getCounterValue()1969 unsigned getCounterValue() const { return CounterValue; } setCounterValue(unsigned V)1970 void setCounterValue(unsigned V) { CounterValue = V; } 1971 1972 /// Retrieves the module that we're currently building, if any. 1973 Module *getCurrentModule(); 1974 1975 /// Allocate a new MacroInfo object with the provided SourceLocation. 1976 MacroInfo *AllocateMacroInfo(SourceLocation L); 1977 1978 /// Turn the specified lexer token into a fully checked and spelled 1979 /// filename, e.g. as an operand of \#include. 1980 /// 1981 /// The caller is expected to provide a buffer that is large enough to hold 1982 /// the spelling of the filename, but is also expected to handle the case 1983 /// when this method decides to use a different buffer. 1984 /// 1985 /// \returns true if the input filename was in <>'s or false if it was 1986 /// in ""'s. 1987 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer); 1988 1989 /// Given a "foo" or \<foo> reference, look up the indicated file. 1990 /// 1991 /// Returns None on failure. \p isAngled indicates whether the file 1992 /// reference is for system \#include's or not (i.e. using <> instead of ""). 1993 Optional<FileEntryRef> 1994 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, 1995 const DirectoryLookup *FromDir, const FileEntry *FromFile, 1996 const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath, 1997 SmallVectorImpl<char> *RelativePath, 1998 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, 1999 bool *IsFrameworkFound, bool SkipCache = false); 2000 2001 /// Get the DirectoryLookup structure used to find the current 2002 /// FileEntry, if CurLexer is non-null and if applicable. 2003 /// 2004 /// This allows us to implement \#include_next and find directory-specific 2005 /// properties. GetCurDirLookup()2006 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; } 2007 2008 /// Return true if we're in the top-level file, not in a \#include. 2009 bool isInPrimaryFile() const; 2010 2011 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is 2012 /// followed by EOD. Return true if the token is not a valid on-off-switch. 2013 bool LexOnOffSwitch(tok::OnOffSwitch &Result); 2014 2015 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 2016 bool *ShadowFlag = nullptr); 2017 2018 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma); 2019 Module *LeaveSubmodule(bool ForPragma); 2020 2021 private: 2022 friend void TokenLexer::ExpandFunctionArguments(); 2023 PushIncludeMacroStack()2024 void PushIncludeMacroStack() { 2025 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer"); 2026 IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule, 2027 std::move(CurLexer), CurPPLexer, 2028 std::move(CurTokenLexer), CurDirLookup); 2029 CurPPLexer = nullptr; 2030 } 2031 PopIncludeMacroStack()2032 void PopIncludeMacroStack() { 2033 CurLexer = std::move(IncludeMacroStack.back().TheLexer); 2034 CurPPLexer = IncludeMacroStack.back().ThePPLexer; 2035 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); 2036 CurDirLookup = IncludeMacroStack.back().TheDirLookup; 2037 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; 2038 CurLexerKind = IncludeMacroStack.back().CurLexerKind; 2039 IncludeMacroStack.pop_back(); 2040 } 2041 2042 void PropagateLineStartLeadingSpaceInfo(Token &Result); 2043 2044 /// Determine whether we need to create module macros for #defines in the 2045 /// current context. 2046 bool needModuleMacros() const; 2047 2048 /// Update the set of active module macros and ambiguity flag for a module 2049 /// macro name. 2050 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info); 2051 2052 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI, 2053 SourceLocation Loc); 2054 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc); 2055 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc, 2056 bool isPublic); 2057 2058 /// Lex and validate a macro name, which occurs after a 2059 /// \#define or \#undef. 2060 /// 2061 /// \param MacroNameTok Token that represents the name defined or undefined. 2062 /// \param IsDefineUndef Kind if preprocessor directive. 2063 /// \param ShadowFlag Points to flag that is set if macro name shadows 2064 /// a keyword. 2065 /// 2066 /// This emits a diagnostic, sets the token kind to eod, 2067 /// and discards the rest of the macro line if the macro name is invalid. 2068 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other, 2069 bool *ShadowFlag = nullptr); 2070 2071 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 2072 /// entire line) of the macro's tokens and adds them to MacroInfo, and while 2073 /// doing so performs certain validity checks including (but not limited to): 2074 /// - # (stringization) is followed by a macro parameter 2075 /// \param MacroNameTok - Token that represents the macro name 2076 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard 2077 /// 2078 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and 2079 /// returns a nullptr if an invalid sequence of tokens is encountered. 2080 MacroInfo *ReadOptionalMacroParameterListAndBody( 2081 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard); 2082 2083 /// The ( starting an argument list of a macro definition has just been read. 2084 /// Lex the rest of the parameters and the closing ), updating \p MI with 2085 /// what we learn and saving in \p LastTok the last token read. 2086 /// Return true if an error occurs parsing the arg list. 2087 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok); 2088 2089 /// We just read a \#if or related directive and decided that the 2090 /// subsequent tokens are in the \#if'd out portion of the 2091 /// file. Lex the rest of the file, until we see an \#endif. If \p 2092 /// FoundNonSkipPortion is true, then we have already emitted code for part of 2093 /// this \#if directive, so \#else/\#elif blocks should never be entered. If 2094 /// \p FoundElse is false, then \#else directives are ok, if not, then we have 2095 /// already seen one so a \#else directive is a duplicate. When this returns, 2096 /// the caller can lex the first valid token. 2097 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, 2098 SourceLocation IfTokenLoc, 2099 bool FoundNonSkipPortion, bool FoundElse, 2100 SourceLocation ElseLoc = SourceLocation()); 2101 2102 /// Information about the result for evaluating an expression for a 2103 /// preprocessor directive. 2104 struct DirectiveEvalResult { 2105 /// Whether the expression was evaluated as true or not. 2106 bool Conditional; 2107 2108 /// True if the expression contained identifiers that were undefined. 2109 bool IncludedUndefinedIds; 2110 2111 /// The source range for the expression. 2112 SourceRange ExprRange; 2113 }; 2114 2115 /// Evaluate an integer constant expression that may occur after a 2116 /// \#if or \#elif directive and return a \p DirectiveEvalResult object. 2117 /// 2118 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. 2119 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); 2120 2121 /// Install the standard preprocessor pragmas: 2122 /// \#pragma GCC poison/system_header/dependency and \#pragma once. 2123 void RegisterBuiltinPragmas(); 2124 2125 /// Register builtin macros such as __LINE__ with the identifier table. 2126 void RegisterBuiltinMacros(); 2127 2128 /// If an identifier token is read that is to be expanded as a macro, handle 2129 /// it and return the next token as 'Tok'. If we lexed a token, return true; 2130 /// otherwise the caller should lex again. 2131 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD); 2132 2133 /// Cache macro expanded tokens for TokenLexers. 2134 // 2135 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is 2136 /// going to lex in the cache and when it finishes the tokens are removed 2137 /// from the end of the cache. 2138 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer, 2139 ArrayRef<Token> tokens); 2140 2141 void removeCachedMacroExpandedTokensOfLastLexer(); 2142 2143 /// Determine whether the next preprocessor token to be 2144 /// lexed is a '('. If so, consume the token and return true, if not, this 2145 /// method should have no observable side-effect on the lexed tokens. 2146 bool isNextPPTokenLParen(); 2147 2148 /// After reading "MACRO(", this method is invoked to read all of the formal 2149 /// arguments specified for the macro invocation. Returns null on error. 2150 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, 2151 SourceLocation &MacroEnd); 2152 2153 /// If an identifier token is read that is to be expanded 2154 /// as a builtin macro, handle it and return the next token as 'Tok'. 2155 void ExpandBuiltinMacro(Token &Tok); 2156 2157 /// Read a \c _Pragma directive, slice it up, process it, then 2158 /// return the first token after the directive. 2159 /// This assumes that the \c _Pragma token has just been read into \p Tok. 2160 void Handle_Pragma(Token &Tok); 2161 2162 /// Like Handle_Pragma except the pragma text is not enclosed within 2163 /// a string literal. 2164 void HandleMicrosoft__pragma(Token &Tok); 2165 2166 /// Add a lexer to the top of the include stack and 2167 /// start lexing tokens from it instead of the current buffer. 2168 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir); 2169 2170 /// Set the FileID for the preprocessor predefines. setPredefinesFileID(FileID FID)2171 void setPredefinesFileID(FileID FID) { 2172 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!"); 2173 PredefinesFileID = FID; 2174 } 2175 2176 /// Set the FileID for the PCH through header. 2177 void setPCHThroughHeaderFileID(FileID FID); 2178 2179 /// Returns true if we are lexing from a file and not a 2180 /// pragma or a macro. IsFileLexer(const Lexer * L,const PreprocessorLexer * P)2181 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) { 2182 return L ? !L->isPragmaLexer() : P != nullptr; 2183 } 2184 IsFileLexer(const IncludeStackInfo & I)2185 static bool IsFileLexer(const IncludeStackInfo& I) { 2186 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer); 2187 } 2188 IsFileLexer()2189 bool IsFileLexer() const { 2190 return IsFileLexer(CurLexer.get(), CurPPLexer); 2191 } 2192 2193 //===--------------------------------------------------------------------===// 2194 // Caching stuff. 2195 void CachingLex(Token &Result); 2196 InCachingLexMode()2197 bool InCachingLexMode() const { 2198 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means 2199 // that we are past EOF, not that we are in CachingLex mode. 2200 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty(); 2201 } 2202 2203 void EnterCachingLexMode(); 2204 void EnterCachingLexModeUnchecked(); 2205 ExitCachingLexMode()2206 void ExitCachingLexMode() { 2207 if (InCachingLexMode()) 2208 RemoveTopOfLexerStack(); 2209 } 2210 2211 const Token &PeekAhead(unsigned N); 2212 void AnnotatePreviousCachedTokens(const Token &Tok); 2213 2214 //===--------------------------------------------------------------------===// 2215 /// Handle*Directive - implement the various preprocessor directives. These 2216 /// should side-effect the current preprocessor object so that the next call 2217 /// to Lex() will return the appropriate token next. 2218 void HandleLineDirective(); 2219 void HandleDigitDirective(Token &Tok); 2220 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning); 2221 void HandleIdentSCCSDirective(Token &Tok); 2222 void HandleMacroPublicDirective(Token &Tok); 2223 void HandleMacroPrivateDirective(); 2224 2225 /// An additional notification that can be produced by a header inclusion or 2226 /// import to tell the parser what happened. 2227 struct ImportAction { 2228 enum ActionKind { 2229 None, 2230 ModuleBegin, 2231 ModuleImport, 2232 SkippedModuleImport, 2233 Failure, 2234 } Kind; 2235 Module *ModuleForHeader = nullptr; 2236 2237 ImportAction(ActionKind AK, Module *Mod = nullptr) KindImportAction2238 : Kind(AK), ModuleForHeader(Mod) { 2239 assert((AK == None || Mod || AK == Failure) && 2240 "no module for module action"); 2241 } 2242 }; 2243 2244 Optional<FileEntryRef> LookupHeaderIncludeOrImport( 2245 const DirectoryLookup *&CurDir, StringRef &Filename, 2246 SourceLocation FilenameLoc, CharSourceRange FilenameRange, 2247 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, 2248 bool &IsMapped, const DirectoryLookup *LookupFrom, 2249 const FileEntry *LookupFromFile, StringRef &LookupFilename, 2250 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, 2251 ModuleMap::KnownHeader &SuggestedModule, bool isAngled); 2252 2253 // File inclusion. 2254 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, 2255 const DirectoryLookup *LookupFrom = nullptr, 2256 const FileEntry *LookupFromFile = nullptr); 2257 ImportAction 2258 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok, 2259 Token &FilenameTok, SourceLocation EndLoc, 2260 const DirectoryLookup *LookupFrom = nullptr, 2261 const FileEntry *LookupFromFile = nullptr); 2262 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok); 2263 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok); 2264 void HandleImportDirective(SourceLocation HashLoc, Token &Tok); 2265 void HandleMicrosoftImportDirective(Token &Tok); 2266 2267 public: 2268 /// Check that the given module is available, producing a diagnostic if not. 2269 /// \return \c true if the check failed (because the module is not available). 2270 /// \c false if the module appears to be usable. 2271 static bool checkModuleIsAvailable(const LangOptions &LangOpts, 2272 const TargetInfo &TargetInfo, 2273 DiagnosticsEngine &Diags, Module *M); 2274 2275 // Module inclusion testing. 2276 /// Find the module that owns the source or header file that 2277 /// \p Loc points to. If the location is in a file that was included 2278 /// into a module, or is outside any module, returns nullptr. 2279 Module *getModuleForLocation(SourceLocation Loc); 2280 2281 /// We want to produce a diagnostic at location IncLoc concerning an 2282 /// unreachable effect at location MLoc (eg, where a desired entity was 2283 /// declared or defined). Determine whether the right way to make MLoc 2284 /// reachable is by #include, and if so, what header should be included. 2285 /// 2286 /// This is not necessarily fast, and might load unexpected module maps, so 2287 /// should only be called by code that intends to produce an error. 2288 /// 2289 /// \param IncLoc The location at which the missing effect was detected. 2290 /// \param MLoc A location within an unimported module at which the desired 2291 /// effect occurred. 2292 /// \return A file that can be #included to provide the desired effect. Null 2293 /// if no such file could be determined or if a #include is not 2294 /// appropriate (eg, if a module should be imported instead). 2295 const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 2296 SourceLocation MLoc); 2297 isRecordingPreamble()2298 bool isRecordingPreamble() const { 2299 return PreambleConditionalStack.isRecording(); 2300 } 2301 hasRecordedPreamble()2302 bool hasRecordedPreamble() const { 2303 return PreambleConditionalStack.hasRecordedPreamble(); 2304 } 2305 getPreambleConditionalStack()2306 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const { 2307 return PreambleConditionalStack.getStack(); 2308 } 2309 setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s)2310 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) { 2311 PreambleConditionalStack.setStack(s); 2312 } 2313 setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,llvm::Optional<PreambleSkipInfo> SkipInfo)2314 void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s, 2315 llvm::Optional<PreambleSkipInfo> SkipInfo) { 2316 PreambleConditionalStack.startReplaying(); 2317 PreambleConditionalStack.setStack(s); 2318 PreambleConditionalStack.SkipInfo = SkipInfo; 2319 } 2320 getPreambleSkipInfo()2321 llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const { 2322 return PreambleConditionalStack.SkipInfo; 2323 } 2324 2325 private: 2326 /// After processing predefined file, initialize the conditional stack from 2327 /// the preamble. 2328 void replayPreambleConditionalStack(); 2329 2330 // Macro handling. 2331 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard); 2332 void HandleUndefDirective(); 2333 2334 // Conditional Inclusion. 2335 void HandleIfdefDirective(Token &Result, const Token &HashToken, 2336 bool isIfndef, bool ReadAnyTokensBeforeDirective); 2337 void HandleIfDirective(Token &IfToken, const Token &HashToken, 2338 bool ReadAnyTokensBeforeDirective); 2339 void HandleEndifDirective(Token &EndifToken); 2340 void HandleElseDirective(Token &Result, const Token &HashToken); 2341 void HandleElifDirective(Token &ElifToken, const Token &HashToken); 2342 2343 // Pragmas. 2344 void HandlePragmaDirective(PragmaIntroducer Introducer); 2345 2346 public: 2347 void HandlePragmaOnce(Token &OnceTok); 2348 void HandlePragmaMark(); 2349 void HandlePragmaPoison(); 2350 void HandlePragmaSystemHeader(Token &SysHeaderTok); 2351 void HandlePragmaDependency(Token &DependencyTok); 2352 void HandlePragmaPushMacro(Token &Tok); 2353 void HandlePragmaPopMacro(Token &Tok); 2354 void HandlePragmaIncludeAlias(Token &Tok); 2355 void HandlePragmaModuleBuild(Token &Tok); 2356 void HandlePragmaHdrstop(Token &Tok); 2357 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok); 2358 2359 // Return true and store the first token only if any CommentHandler 2360 // has inserted some tokens and getCommentRetentionState() is false. 2361 bool HandleComment(Token &result, SourceRange Comment); 2362 2363 /// A macro is used, update information about macros that need unused 2364 /// warnings. 2365 void markMacroAsUsed(MacroInfo *MI); 2366 2367 private: 2368 Optional<unsigned> 2369 getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc); 2370 2371 /// Contains the currently active skipped range mappings for skipping excluded 2372 /// conditional directives. 2373 ExcludedPreprocessorDirectiveSkipMapping 2374 *ExcludedConditionalDirectiveSkipMappings; 2375 }; 2376 2377 /// Abstract base class that describes a handler that will receive 2378 /// source ranges for each of the comments encountered in the source file. 2379 class CommentHandler { 2380 public: 2381 virtual ~CommentHandler(); 2382 2383 // The handler shall return true if it has pushed any tokens 2384 // to be read using e.g. EnterToken or EnterTokenStream. 2385 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; 2386 }; 2387 2388 /// Registry of pragma handlers added by plugins 2389 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; 2390 2391 } // namespace clang 2392 2393 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H 2394