xref: /openbsd/gnu/llvm/clang/lib/Lex/PPDirectives.cpp (revision 12c85518)
1 //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Implements # directive processing for the Preprocessor.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/IdentifierTable.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/Module.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "clang/Lex/CodeCompletionHandler.h"
23 #include "clang/Lex/HeaderSearch.h"
24 #include "clang/Lex/LexDiagnostic.h"
25 #include "clang/Lex/LiteralSupport.h"
26 #include "clang/Lex/MacroInfo.h"
27 #include "clang/Lex/ModuleLoader.h"
28 #include "clang/Lex/ModuleMap.h"
29 #include "clang/Lex/PPCallbacks.h"
30 #include "clang/Lex/Pragma.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "clang/Lex/PreprocessorOptions.h"
33 #include "clang/Lex/Token.h"
34 #include "clang/Lex/VariadicMacroSupport.h"
35 #include "llvm/ADT/ArrayRef.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/ScopeExit.h"
38 #include "llvm/ADT/SmallString.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/StringRef.h"
41 #include "llvm/ADT/StringSwitch.h"
42 #include "llvm/Support/AlignOf.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include "llvm/Support/Path.h"
45 #include "llvm/Support/SaveAndRestore.h"
46 #include <algorithm>
47 #include <cassert>
48 #include <cstring>
49 #include <new>
50 #include <optional>
51 #include <string>
52 #include <utility>
53 
54 using namespace clang;
55 
56 //===----------------------------------------------------------------------===//
57 // Utility Methods for Preprocessor Directive Handling.
58 //===----------------------------------------------------------------------===//
59 
AllocateMacroInfo(SourceLocation L)60 MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
61   static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
62   return new (BP) MacroInfo(L);
63 }
64 
AllocateDefMacroDirective(MacroInfo * MI,SourceLocation Loc)65 DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
66                                                            SourceLocation Loc) {
67   return new (BP) DefMacroDirective(MI, Loc);
68 }
69 
70 UndefMacroDirective *
AllocateUndefMacroDirective(SourceLocation UndefLoc)71 Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
72   return new (BP) UndefMacroDirective(UndefLoc);
73 }
74 
75 VisibilityMacroDirective *
AllocateVisibilityMacroDirective(SourceLocation Loc,bool isPublic)76 Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
77                                                bool isPublic) {
78   return new (BP) VisibilityMacroDirective(Loc, isPublic);
79 }
80 
81 /// Read and discard all tokens remaining on the current line until
82 /// the tok::eod token is found.
DiscardUntilEndOfDirective()83 SourceRange Preprocessor::DiscardUntilEndOfDirective() {
84   Token Tmp;
85   SourceRange Res;
86 
87   LexUnexpandedToken(Tmp);
88   Res.setBegin(Tmp.getLocation());
89   while (Tmp.isNot(tok::eod)) {
90     assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
91     LexUnexpandedToken(Tmp);
92   }
93   Res.setEnd(Tmp.getLocation());
94   return Res;
95 }
96 
97 /// Enumerates possible cases of #define/#undef a reserved identifier.
98 enum MacroDiag {
99   MD_NoWarn,        //> Not a reserved identifier
100   MD_KeywordDef,    //> Macro hides keyword, enabled by default
101   MD_ReservedMacro  //> #define of #undef reserved id, disabled by default
102 };
103 
104 /// Enumerates possible %select values for the pp_err_elif_after_else and
105 /// pp_err_elif_without_if diagnostics.
106 enum PPElifDiag {
107   PED_Elif,
108   PED_Elifdef,
109   PED_Elifndef
110 };
111 
shouldWarnOnMacroDef(Preprocessor & PP,IdentifierInfo * II)112 static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
113   const LangOptions &Lang = PP.getLangOpts();
114   if (isReservedInAllContexts(II->isReserved(Lang))) {
115     // list from:
116     // - https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
117     // - https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
118     // - man 7 feature_test_macros
119     // The list must be sorted for correct binary search.
120     static constexpr StringRef ReservedMacro[] = {
121         "_ATFILE_SOURCE",
122         "_BSD_SOURCE",
123         "_CRT_NONSTDC_NO_WARNINGS",
124         "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
125         "_CRT_SECURE_NO_WARNINGS",
126         "_FILE_OFFSET_BITS",
127         "_FORTIFY_SOURCE",
128         "_GLIBCXX_ASSERTIONS",
129         "_GLIBCXX_CONCEPT_CHECKS",
130         "_GLIBCXX_DEBUG",
131         "_GLIBCXX_DEBUG_PEDANTIC",
132         "_GLIBCXX_PARALLEL",
133         "_GLIBCXX_PARALLEL_ASSERTIONS",
134         "_GLIBCXX_SANITIZE_VECTOR",
135         "_GLIBCXX_USE_CXX11_ABI",
136         "_GLIBCXX_USE_DEPRECATED",
137         "_GNU_SOURCE",
138         "_ISOC11_SOURCE",
139         "_ISOC95_SOURCE",
140         "_ISOC99_SOURCE",
141         "_LARGEFILE64_SOURCE",
142         "_POSIX_C_SOURCE",
143         "_REENTRANT",
144         "_SVID_SOURCE",
145         "_THREAD_SAFE",
146         "_XOPEN_SOURCE",
147         "_XOPEN_SOURCE_EXTENDED",
148         "__STDCPP_WANT_MATH_SPEC_FUNCS__",
149         "__STDC_FORMAT_MACROS",
150     };
151     if (std::binary_search(std::begin(ReservedMacro), std::end(ReservedMacro),
152                            II->getName()))
153       return MD_NoWarn;
154 
155     return MD_ReservedMacro;
156   }
157   StringRef Text = II->getName();
158   if (II->isKeyword(Lang))
159     return MD_KeywordDef;
160   if (Lang.CPlusPlus11 && (Text.equals("override") || Text.equals("final")))
161     return MD_KeywordDef;
162   return MD_NoWarn;
163 }
164 
shouldWarnOnMacroUndef(Preprocessor & PP,IdentifierInfo * II)165 static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
166   const LangOptions &Lang = PP.getLangOpts();
167   // Do not warn on keyword undef.  It is generally harmless and widely used.
168   if (isReservedInAllContexts(II->isReserved(Lang)))
169     return MD_ReservedMacro;
170   return MD_NoWarn;
171 }
172 
173 // Return true if we want to issue a diagnostic by default if we
174 // encounter this name in a #include with the wrong case. For now,
175 // this includes the standard C and C++ headers, Posix headers,
176 // and Boost headers. Improper case for these #includes is a
177 // potential portability issue.
warnByDefaultOnWrongCase(StringRef Include)178 static bool warnByDefaultOnWrongCase(StringRef Include) {
179   // If the first component of the path is "boost", treat this like a standard header
180   // for the purposes of diagnostics.
181   if (::llvm::sys::path::begin(Include)->equals_insensitive("boost"))
182     return true;
183 
184   // "condition_variable" is the longest standard header name at 18 characters.
185   // If the include file name is longer than that, it can't be a standard header.
186   static const size_t MaxStdHeaderNameLen = 18u;
187   if (Include.size() > MaxStdHeaderNameLen)
188     return false;
189 
190   // Lowercase and normalize the search string.
191   SmallString<32> LowerInclude{Include};
192   for (char &Ch : LowerInclude) {
193     // In the ASCII range?
194     if (static_cast<unsigned char>(Ch) > 0x7f)
195       return false; // Can't be a standard header
196     // ASCII lowercase:
197     if (Ch >= 'A' && Ch <= 'Z')
198       Ch += 'a' - 'A';
199     // Normalize path separators for comparison purposes.
200     else if (::llvm::sys::path::is_separator(Ch))
201       Ch = '/';
202   }
203 
204   // The standard C/C++ and Posix headers
205   return llvm::StringSwitch<bool>(LowerInclude)
206     // C library headers
207     .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
208     .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
209     .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
210     .Cases("stdatomic.h", "stdbool.h", "stddef.h", "stdint.h", "stdio.h", true)
211     .Cases("stdlib.h", "stdnoreturn.h", "string.h", "tgmath.h", "threads.h", true)
212     .Cases("time.h", "uchar.h", "wchar.h", "wctype.h", true)
213 
214     // C++ headers for C library facilities
215     .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
216     .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
217     .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
218     .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
219     .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
220     .Case("cwctype", true)
221 
222     // C++ library headers
223     .Cases("algorithm", "fstream", "list", "regex", "thread", true)
224     .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
225     .Cases("atomic", "future", "map", "set", "type_traits", true)
226     .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
227     .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
228     .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
229     .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
230     .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
231     .Cases("deque", "istream", "queue", "string", "valarray", true)
232     .Cases("exception", "iterator", "random", "strstream", "vector", true)
233     .Cases("forward_list", "limits", "ratio", "system_error", true)
234 
235     // POSIX headers (which aren't also C headers)
236     .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
237     .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
238     .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
239     .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
240     .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
241     .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
242     .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
243     .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
244     .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
245     .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
246     .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
247     .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
248     .Default(false);
249 }
250 
251 /// Find a similar string in `Candidates`.
252 ///
253 /// \param LHS a string for a similar string in `Candidates`
254 ///
255 /// \param Candidates the candidates to find a similar string.
256 ///
257 /// \returns a similar string if exists. If no similar string exists,
258 /// returns std::nullopt.
259 static std::optional<StringRef>
findSimilarStr(StringRef LHS,const std::vector<StringRef> & Candidates)260 findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
261   // We need to check if `Candidates` has the exact case-insensitive string
262   // because the Levenshtein distance match does not care about it.
263   for (StringRef C : Candidates) {
264     if (LHS.equals_insensitive(C)) {
265       return C;
266     }
267   }
268 
269   // Keep going with the Levenshtein distance match.
270   // If the LHS size is less than 3, use the LHS size minus 1 and if not,
271   // use the LHS size divided by 3.
272   size_t Length = LHS.size();
273   size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
274 
275   std::optional<std::pair<StringRef, size_t>> SimilarStr;
276   for (StringRef C : Candidates) {
277     size_t CurDist = LHS.edit_distance(C, true);
278     if (CurDist <= MaxDist) {
279       if (!SimilarStr) {
280         // The first similar string found.
281         SimilarStr = {C, CurDist};
282       } else if (CurDist < SimilarStr->second) {
283         // More similar string found.
284         SimilarStr = {C, CurDist};
285       }
286     }
287   }
288 
289   if (SimilarStr) {
290     return SimilarStr->first;
291   } else {
292     return std::nullopt;
293   }
294 }
295 
CheckMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)296 bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
297                                   bool *ShadowFlag) {
298   // Missing macro name?
299   if (MacroNameTok.is(tok::eod))
300     return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
301 
302   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
303   if (!II)
304     return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
305 
306   if (II->isCPlusPlusOperatorKeyword()) {
307     // C++ 2.5p2: Alternative tokens behave the same as its primary token
308     // except for their spellings.
309     Diag(MacroNameTok, getLangOpts().MicrosoftExt
310                            ? diag::ext_pp_operator_used_as_macro_name
311                            : diag::err_pp_operator_used_as_macro_name)
312         << II << MacroNameTok.getKind();
313     // Allow #defining |and| and friends for Microsoft compatibility or
314     // recovery when legacy C headers are included in C++.
315   }
316 
317   if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
318     // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
319     return Diag(MacroNameTok, diag::err_defined_macro_name);
320   }
321 
322   if (isDefineUndef == MU_Undef) {
323     auto *MI = getMacroInfo(II);
324     if (MI && MI->isBuiltinMacro()) {
325       // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4
326       // and C++ [cpp.predefined]p4], but allow it as an extension.
327       Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
328     }
329   }
330 
331   // If defining/undefining reserved identifier or a keyword, we need to issue
332   // a warning.
333   SourceLocation MacroNameLoc = MacroNameTok.getLocation();
334   if (ShadowFlag)
335     *ShadowFlag = false;
336   if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&
337       (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) {
338     MacroDiag D = MD_NoWarn;
339     if (isDefineUndef == MU_Define) {
340       D = shouldWarnOnMacroDef(*this, II);
341     }
342     else if (isDefineUndef == MU_Undef)
343       D = shouldWarnOnMacroUndef(*this, II);
344     if (D == MD_KeywordDef) {
345       // We do not want to warn on some patterns widely used in configuration
346       // scripts.  This requires analyzing next tokens, so do not issue warnings
347       // now, only inform caller.
348       if (ShadowFlag)
349         *ShadowFlag = true;
350     }
351     if (D == MD_ReservedMacro)
352       Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
353   }
354 
355   // Okay, we got a good identifier.
356   return false;
357 }
358 
359 /// Lex and validate a macro name, which occurs after a
360 /// \#define or \#undef.
361 ///
362 /// This sets the token kind to eod and discards the rest of the macro line if
363 /// the macro name is invalid.
364 ///
365 /// \param MacroNameTok Token that is expected to be a macro name.
366 /// \param isDefineUndef Context in which macro is used.
367 /// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
ReadMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)368 void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
369                                  bool *ShadowFlag) {
370   // Read the token, don't allow macro expansion on it.
371   LexUnexpandedToken(MacroNameTok);
372 
373   if (MacroNameTok.is(tok::code_completion)) {
374     if (CodeComplete)
375       CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);
376     setCodeCompletionReached();
377     LexUnexpandedToken(MacroNameTok);
378   }
379 
380   if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
381     return;
382 
383   // Invalid macro name, read and discard the rest of the line and set the
384   // token kind to tok::eod if necessary.
385   if (MacroNameTok.isNot(tok::eod)) {
386     MacroNameTok.setKind(tok::eod);
387     DiscardUntilEndOfDirective();
388   }
389 }
390 
391 /// Ensure that the next token is a tok::eod token.
392 ///
393 /// If not, emit a diagnostic and consume up until the eod.  If EnableMacros is
394 /// true, then we consider macros that expand to zero tokens as being ok.
395 ///
396 /// Returns the location of the end of the directive.
CheckEndOfDirective(const char * DirType,bool EnableMacros)397 SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
398                                                  bool EnableMacros) {
399   Token Tmp;
400   // Lex unexpanded tokens for most directives: macros might expand to zero
401   // tokens, causing us to miss diagnosing invalid lines.  Some directives (like
402   // #line) allow empty macros.
403   if (EnableMacros)
404     Lex(Tmp);
405   else
406     LexUnexpandedToken(Tmp);
407 
408   // There should be no tokens after the directive, but we allow them as an
409   // extension.
410   while (Tmp.is(tok::comment))  // Skip comments in -C mode.
411     LexUnexpandedToken(Tmp);
412 
413   if (Tmp.is(tok::eod))
414     return Tmp.getLocation();
415 
416   // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
417   // or if this is a macro-style preprocessing directive, because it is more
418   // trouble than it is worth to insert /**/ and check that there is no /**/
419   // in the range also.
420   FixItHint Hint;
421   if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
422       !CurTokenLexer)
423     Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
424   Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
425   return DiscardUntilEndOfDirective().getEnd();
426 }
427 
SuggestTypoedDirective(const Token & Tok,StringRef Directive) const428 void Preprocessor::SuggestTypoedDirective(const Token &Tok,
429                                           StringRef Directive) const {
430   // If this is a `.S` file, treat unknown # directives as non-preprocessor
431   // directives.
432   if (getLangOpts().AsmPreprocessor) return;
433 
434   std::vector<StringRef> Candidates = {
435       "if", "ifdef", "ifndef", "elif", "else", "endif"
436   };
437   if (LangOpts.C2x || LangOpts.CPlusPlus2b)
438     Candidates.insert(Candidates.end(), {"elifdef", "elifndef"});
439 
440   if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {
441     // Directive cannot be coming from macro.
442     assert(Tok.getLocation().isFileID());
443     CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
444         Tok.getLocation(),
445         Tok.getLocation().getLocWithOffset(Directive.size()));
446     StringRef SuggValue = *Sugg;
447 
448     auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue);
449     Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
450   }
451 }
452 
453 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
454 /// decided that the subsequent tokens are in the \#if'd out portion of the
455 /// file.  Lex the rest of the file, until we see an \#endif.  If
456 /// FoundNonSkipPortion is true, then we have already emitted code for part of
457 /// this \#if directive, so \#else/\#elif blocks should never be entered.
458 /// If ElseOk is true, then \#else directives are ok, if not, then we have
459 /// already seen one so a \#else directive is a duplicate.  When this returns,
460 /// the caller can lex the first valid token.
SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,SourceLocation IfTokenLoc,bool FoundNonSkipPortion,bool FoundElse,SourceLocation ElseLoc)461 void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
462                                                 SourceLocation IfTokenLoc,
463                                                 bool FoundNonSkipPortion,
464                                                 bool FoundElse,
465                                                 SourceLocation ElseLoc) {
466   // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
467   // not getting called recursively by storing the RecordedSkippedRanges
468   // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
469   // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
470   // invalidated. If this changes and there is a need to call
471   // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
472   // change to do a second lookup in endLexPass function instead of reusing the
473   // lookup pointer.
474   assert(!SkippingExcludedConditionalBlock &&
475          "calling SkipExcludedConditionalBlock recursively");
476   llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
477 
478   ++NumSkipped;
479   assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?");
480 
481   if (PreambleConditionalStack.reachedEOFWhileSkipping())
482     PreambleConditionalStack.clearSkipInfo();
483   else
484     CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
485                                      FoundNonSkipPortion, FoundElse);
486 
487   // Enter raw mode to disable identifier lookup (and thus macro expansion),
488   // disabling warnings, etc.
489   CurPPLexer->LexingRawMode = true;
490   Token Tok;
491   SourceLocation endLoc;
492 
493   /// Keeps track and caches skipped ranges and also retrieves a prior skipped
494   /// range if the same block is re-visited.
495   struct SkippingRangeStateTy {
496     Preprocessor &PP;
497 
498     const char *BeginPtr = nullptr;
499     unsigned *SkipRangePtr = nullptr;
500 
501     SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
502 
503     void beginLexPass() {
504       if (BeginPtr)
505         return; // continue skipping a block.
506 
507       // Initiate a skipping block and adjust the lexer if we already skipped it
508       // before.
509       BeginPtr = PP.CurLexer->getBufferLocation();
510       SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
511       if (*SkipRangePtr) {
512         PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
513                           /*IsAtStartOfLine*/ true);
514       }
515     }
516 
517     void endLexPass(const char *Hashptr) {
518       if (!BeginPtr) {
519         // Not doing normal lexing.
520         assert(PP.CurLexer->isDependencyDirectivesLexer());
521         return;
522       }
523 
524       // Finished skipping a block, record the range if it's first time visited.
525       if (!*SkipRangePtr) {
526         *SkipRangePtr = Hashptr - BeginPtr;
527       }
528       assert(*SkipRangePtr == Hashptr - BeginPtr);
529       BeginPtr = nullptr;
530       SkipRangePtr = nullptr;
531     }
532   } SkippingRangeState(*this);
533 
534   while (true) {
535     if (CurLexer->isDependencyDirectivesLexer()) {
536       CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
537     } else {
538       SkippingRangeState.beginLexPass();
539       while (true) {
540         CurLexer->Lex(Tok);
541 
542         if (Tok.is(tok::code_completion)) {
543           setCodeCompletionReached();
544           if (CodeComplete)
545             CodeComplete->CodeCompleteInConditionalExclusion();
546           continue;
547         }
548 
549         // If this is the end of the buffer, we have an error.
550         if (Tok.is(tok::eof)) {
551           // We don't emit errors for unterminated conditionals here,
552           // Lexer::LexEndOfFile can do that properly.
553           // Just return and let the caller lex after this #include.
554           if (PreambleConditionalStack.isRecording())
555             PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
556                                                       FoundNonSkipPortion,
557                                                       FoundElse, ElseLoc);
558           break;
559         }
560 
561         // If this token is not a preprocessor directive, just skip it.
562         if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
563           continue;
564 
565         break;
566       }
567     }
568     if (Tok.is(tok::eof))
569       break;
570 
571     // We just parsed a # character at the start of a line, so we're in
572     // directive mode.  Tell the lexer this so any newlines we see will be
573     // converted into an EOD token (this terminates the macro).
574     CurPPLexer->ParsingPreprocessorDirective = true;
575     if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
576 
577     assert(Tok.is(tok::hash));
578     const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
579     assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
580 
581     // Read the next token, the directive flavor.
582     LexUnexpandedToken(Tok);
583 
584     // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
585     // something bogus), skip it.
586     if (Tok.isNot(tok::raw_identifier)) {
587       CurPPLexer->ParsingPreprocessorDirective = false;
588       // Restore comment saving mode.
589       if (CurLexer) CurLexer->resetExtendedTokenMode();
590       continue;
591     }
592 
593     // If the first letter isn't i or e, it isn't intesting to us.  We know that
594     // this is safe in the face of spelling differences, because there is no way
595     // to spell an i/e in a strange way that is another letter.  Skipping this
596     // allows us to avoid looking up the identifier info for #define/#undef and
597     // other common directives.
598     StringRef RI = Tok.getRawIdentifier();
599 
600     char FirstChar = RI[0];
601     if (FirstChar >= 'a' && FirstChar <= 'z' &&
602         FirstChar != 'i' && FirstChar != 'e') {
603       CurPPLexer->ParsingPreprocessorDirective = false;
604       // Restore comment saving mode.
605       if (CurLexer) CurLexer->resetExtendedTokenMode();
606       continue;
607     }
608 
609     // Get the identifier name without trigraphs or embedded newlines.  Note
610     // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
611     // when skipping.
612     char DirectiveBuf[20];
613     StringRef Directive;
614     if (!Tok.needsCleaning() && RI.size() < 20) {
615       Directive = RI;
616     } else {
617       std::string DirectiveStr = getSpelling(Tok);
618       size_t IdLen = DirectiveStr.size();
619       if (IdLen >= 20) {
620         CurPPLexer->ParsingPreprocessorDirective = false;
621         // Restore comment saving mode.
622         if (CurLexer) CurLexer->resetExtendedTokenMode();
623         continue;
624       }
625       memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
626       Directive = StringRef(DirectiveBuf, IdLen);
627     }
628 
629     if (Directive.startswith("if")) {
630       StringRef Sub = Directive.substr(2);
631       if (Sub.empty() ||   // "if"
632           Sub == "def" ||   // "ifdef"
633           Sub == "ndef") {  // "ifndef"
634         // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
635         // bother parsing the condition.
636         DiscardUntilEndOfDirective();
637         CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
638                                        /*foundnonskip*/false,
639                                        /*foundelse*/false);
640       } else {
641         SuggestTypoedDirective(Tok, Directive);
642       }
643     } else if (Directive[0] == 'e') {
644       StringRef Sub = Directive.substr(1);
645       if (Sub == "ndif") {  // "endif"
646         PPConditionalInfo CondInfo;
647         CondInfo.WasSkipping = true; // Silence bogus warning.
648         bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
649         (void)InCond;  // Silence warning in no-asserts mode.
650         assert(!InCond && "Can't be skipping if not in a conditional!");
651 
652         // If we popped the outermost skipping block, we're done skipping!
653         if (!CondInfo.WasSkipping) {
654           SkippingRangeState.endLexPass(Hashptr);
655           // Restore the value of LexingRawMode so that trailing comments
656           // are handled correctly, if we've reached the outermost block.
657           CurPPLexer->LexingRawMode = false;
658           endLoc = CheckEndOfDirective("endif");
659           CurPPLexer->LexingRawMode = true;
660           if (Callbacks)
661             Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);
662           break;
663         } else {
664           DiscardUntilEndOfDirective();
665         }
666       } else if (Sub == "lse") { // "else".
667         // #else directive in a skipping conditional.  If not in some other
668         // skipping conditional, and if #else hasn't already been seen, enter it
669         // as a non-skipping conditional.
670         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
671 
672         if (!CondInfo.WasSkipping)
673           SkippingRangeState.endLexPass(Hashptr);
674 
675         // If this is a #else with a #else before it, report the error.
676         if (CondInfo.FoundElse)
677           Diag(Tok, diag::pp_err_else_after_else);
678 
679         // Note that we've seen a #else in this conditional.
680         CondInfo.FoundElse = true;
681 
682         // If the conditional is at the top level, and the #if block wasn't
683         // entered, enter the #else block now.
684         if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
685           CondInfo.FoundNonSkip = true;
686           // Restore the value of LexingRawMode so that trailing comments
687           // are handled correctly.
688           CurPPLexer->LexingRawMode = false;
689           endLoc = CheckEndOfDirective("else");
690           CurPPLexer->LexingRawMode = true;
691           if (Callbacks)
692             Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);
693           break;
694         } else {
695           DiscardUntilEndOfDirective();  // C99 6.10p4.
696         }
697       } else if (Sub == "lif") {  // "elif".
698         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
699 
700         if (!CondInfo.WasSkipping)
701           SkippingRangeState.endLexPass(Hashptr);
702 
703         // If this is a #elif with a #else before it, report the error.
704         if (CondInfo.FoundElse)
705           Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;
706 
707         // If this is in a skipping block or if we're already handled this #if
708         // block, don't bother parsing the condition.
709         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
710           // FIXME: We should probably do at least some minimal parsing of the
711           // condition to verify that it is well-formed. The current state
712           // allows #elif* directives with completely malformed (or missing)
713           // conditions.
714           DiscardUntilEndOfDirective();
715         } else {
716           // Restore the value of LexingRawMode so that identifiers are
717           // looked up, etc, inside the #elif expression.
718           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
719           CurPPLexer->LexingRawMode = false;
720           IdentifierInfo *IfNDefMacro = nullptr;
721           DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
722           // Stop if Lexer became invalid after hitting code completion token.
723           if (!CurPPLexer)
724             return;
725           const bool CondValue = DER.Conditional;
726           CurPPLexer->LexingRawMode = true;
727           if (Callbacks) {
728             Callbacks->Elif(
729                 Tok.getLocation(), DER.ExprRange,
730                 (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
731                 CondInfo.IfLoc);
732           }
733           // If this condition is true, enter it!
734           if (CondValue) {
735             CondInfo.FoundNonSkip = true;
736             break;
737           }
738         }
739       } else if (Sub == "lifdef" ||  // "elifdef"
740                  Sub == "lifndef") { // "elifndef"
741         bool IsElifDef = Sub == "lifdef";
742         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
743         Token DirectiveToken = Tok;
744 
745         if (!CondInfo.WasSkipping)
746           SkippingRangeState.endLexPass(Hashptr);
747 
748         // Warn if using `#elifdef` & `#elifndef` in not C2x & C++2b mode even
749         // if this branch is in a skipping block.
750         unsigned DiagID;
751         if (LangOpts.CPlusPlus)
752           DiagID = LangOpts.CPlusPlus2b ? diag::warn_cxx2b_compat_pp_directive
753                                         : diag::ext_cxx2b_pp_directive;
754         else
755           DiagID = LangOpts.C2x ? diag::warn_c2x_compat_pp_directive
756                                 : diag::ext_c2x_pp_directive;
757         Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
758 
759         // If this is a #elif with a #else before it, report the error.
760         if (CondInfo.FoundElse)
761           Diag(Tok, diag::pp_err_elif_after_else)
762               << (IsElifDef ? PED_Elifdef : PED_Elifndef);
763 
764         // If this is in a skipping block or if we're already handled this #if
765         // block, don't bother parsing the condition.
766         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
767           // FIXME: We should probably do at least some minimal parsing of the
768           // condition to verify that it is well-formed. The current state
769           // allows #elif* directives with completely malformed (or missing)
770           // conditions.
771           DiscardUntilEndOfDirective();
772         } else {
773           // Restore the value of LexingRawMode so that identifiers are
774           // looked up, etc, inside the #elif[n]def expression.
775           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
776           CurPPLexer->LexingRawMode = false;
777           Token MacroNameTok;
778           ReadMacroName(MacroNameTok);
779           CurPPLexer->LexingRawMode = true;
780 
781           // If the macro name token is tok::eod, there was an error that was
782           // already reported.
783           if (MacroNameTok.is(tok::eod)) {
784             // Skip code until we get to #endif.  This helps with recovery by
785             // not emitting an error when the #endif is reached.
786             continue;
787           }
788 
789           emitMacroExpansionWarnings(MacroNameTok);
790 
791           CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef");
792 
793           IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
794           auto MD = getMacroDefinition(MII);
795           MacroInfo *MI = MD.getMacroInfo();
796 
797           if (Callbacks) {
798             if (IsElifDef) {
799               Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok,
800                                  MD);
801             } else {
802               Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok,
803                                   MD);
804             }
805           }
806           // If this condition is true, enter it!
807           if (static_cast<bool>(MI) == IsElifDef) {
808             CondInfo.FoundNonSkip = true;
809             break;
810           }
811         }
812       } else {
813         SuggestTypoedDirective(Tok, Directive);
814       }
815     } else {
816       SuggestTypoedDirective(Tok, Directive);
817     }
818 
819     CurPPLexer->ParsingPreprocessorDirective = false;
820     // Restore comment saving mode.
821     if (CurLexer) CurLexer->resetExtendedTokenMode();
822   }
823 
824   // Finally, if we are out of the conditional (saw an #endif or ran off the end
825   // of the file, just stop skipping and return to lexing whatever came after
826   // the #if block.
827   CurPPLexer->LexingRawMode = false;
828 
829   // The last skipped range isn't actually skipped yet if it's truncated
830   // by the end of the preamble; we'll resume parsing after the preamble.
831   if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
832     Callbacks->SourceRangeSkipped(
833         SourceRange(HashTokenLoc, endLoc.isValid()
834                                       ? endLoc
835                                       : CurPPLexer->getSourceLocation()),
836         Tok.getLocation());
837 }
838 
getModuleForLocation(SourceLocation Loc,bool AllowTextual)839 Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
840                                            bool AllowTextual) {
841   if (!SourceMgr.isInMainFile(Loc)) {
842     // Try to determine the module of the include directive.
843     // FIXME: Look into directly passing the FileEntry from LookupFile instead.
844     FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
845     if (const FileEntry *EntryOfIncl = SourceMgr.getFileEntryForID(IDOfIncl)) {
846       // The include comes from an included file.
847       return HeaderInfo.getModuleMap()
848           .findModuleForHeader(EntryOfIncl, AllowTextual)
849           .getModule();
850     }
851   }
852 
853   // This is either in the main file or not in a file at all. It belongs
854   // to the current module, if there is one.
855   return getLangOpts().CurrentModule.empty()
856              ? nullptr
857              : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc);
858 }
859 
860 const FileEntry *
getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,SourceLocation Loc)861 Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
862                                                SourceLocation Loc) {
863   Module *IncM = getModuleForLocation(
864       IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
865 
866   // Walk up through the include stack, looking through textual headers of M
867   // until we hit a non-textual header that we can #include. (We assume textual
868   // headers of a module with non-textual headers aren't meant to be used to
869   // import entities from the module.)
870   auto &SM = getSourceManager();
871   while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
872     auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
873     auto *FE = SM.getFileEntryForID(ID);
874     if (!FE)
875       break;
876 
877     // We want to find all possible modules that might contain this header, so
878     // search all enclosing directories for module maps and load them.
879     HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr,
880                             SourceMgr.isInSystemHeader(Loc));
881 
882     bool InPrivateHeader = false;
883     for (auto Header : HeaderInfo.findAllModulesForHeader(FE)) {
884       if (!Header.isAccessibleFrom(IncM)) {
885         // It's in a private header; we can't #include it.
886         // FIXME: If there's a public header in some module that re-exports it,
887         // then we could suggest including that, but it's not clear that's the
888         // expected way to make this entity visible.
889         InPrivateHeader = true;
890         continue;
891       }
892 
893       // Don't suggest explicitly excluded headers.
894       if (Header.getRole() == ModuleMap::ExcludedHeader)
895         continue;
896 
897       // We'll suggest including textual headers below if they're
898       // include-guarded.
899       if (Header.getRole() & ModuleMap::TextualHeader)
900         continue;
901 
902       // If we have a module import syntax, we shouldn't include a header to
903       // make a particular module visible. Let the caller know they should
904       // suggest an import instead.
905       if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules ||
906           getLangOpts().ModulesTS)
907         return nullptr;
908 
909       // If this is an accessible, non-textual header of M's top-level module
910       // that transitively includes the given location and makes the
911       // corresponding module visible, this is the thing to #include.
912       return FE;
913     }
914 
915     // FIXME: If we're bailing out due to a private header, we shouldn't suggest
916     // an import either.
917     if (InPrivateHeader)
918       return nullptr;
919 
920     // If the header is includable and has an include guard, assume the
921     // intended way to expose its contents is by #include, not by importing a
922     // module that transitively includes it.
923     if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE))
924       return FE;
925 
926     Loc = SM.getIncludeLoc(ID);
927   }
928 
929   return nullptr;
930 }
931 
LookupFile(SourceLocation FilenameLoc,StringRef Filename,bool isAngled,ConstSearchDirIterator FromDir,const FileEntry * FromFile,ConstSearchDirIterator * CurDirArg,SmallVectorImpl<char> * SearchPath,SmallVectorImpl<char> * RelativePath,ModuleMap::KnownHeader * SuggestedModule,bool * IsMapped,bool * IsFrameworkFound,bool SkipCache,bool OpenFile,bool CacheFailures)932 OptionalFileEntryRef Preprocessor::LookupFile(
933     SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
934     ConstSearchDirIterator FromDir, const FileEntry *FromFile,
935     ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
936     SmallVectorImpl<char> *RelativePath,
937     ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
938     bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
939   ConstSearchDirIterator CurDirLocal = nullptr;
940   ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
941 
942   Module *RequestingModule = getModuleForLocation(
943       FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
944   bool RequestingModuleIsModuleInterface = !SourceMgr.isInMainFile(FilenameLoc);
945 
946   // If the header lookup mechanism may be relative to the current inclusion
947   // stack, record the parent #includes.
948   SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 16>
949       Includers;
950   bool BuildSystemModule = false;
951   if (!FromDir && !FromFile) {
952     FileID FID = getCurrentFileLexer()->getFileID();
953     const FileEntry *FileEnt = SourceMgr.getFileEntryForID(FID);
954 
955     // If there is no file entry associated with this file, it must be the
956     // predefines buffer or the module includes buffer. Any other file is not
957     // lexed with a normal lexer, so it won't be scanned for preprocessor
958     // directives.
959     //
960     // If we have the predefines buffer, resolve #include references (which come
961     // from the -include command line argument) from the current working
962     // directory instead of relative to the main file.
963     //
964     // If we have the module includes buffer, resolve #include references (which
965     // come from header declarations in the module map) relative to the module
966     // map file.
967     if (!FileEnt) {
968       if (FID == SourceMgr.getMainFileID() && MainFileDir) {
969         Includers.push_back(std::make_pair(nullptr, MainFileDir));
970         BuildSystemModule = getCurrentModule()->IsSystem;
971       } else if ((FileEnt =
972                     SourceMgr.getFileEntryForID(SourceMgr.getMainFileID())))
973         Includers.push_back(std::make_pair(FileEnt, *FileMgr.getDirectory(".")));
974     } else {
975       Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
976     }
977 
978     // MSVC searches the current include stack from top to bottom for
979     // headers included by quoted include directives.
980     // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
981     if (LangOpts.MSVCCompat && !isAngled) {
982       for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
983         if (IsFileLexer(ISEntry))
984           if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
985             Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
986       }
987     }
988   }
989 
990   CurDir = CurDirLookup;
991 
992   if (FromFile) {
993     // We're supposed to start looking from after a particular file. Search
994     // the include path until we find that file or run out of files.
995     ConstSearchDirIterator TmpCurDir = CurDir;
996     ConstSearchDirIterator TmpFromDir = nullptr;
997     while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
998                Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir,
999                Includers, SearchPath, RelativePath, RequestingModule,
1000                SuggestedModule, /*IsMapped=*/nullptr,
1001                /*IsFrameworkFound=*/nullptr, SkipCache)) {
1002       // Keep looking as if this file did a #include_next.
1003       TmpFromDir = TmpCurDir;
1004       ++TmpFromDir;
1005       if (&FE->getFileEntry() == FromFile) {
1006         // Found it.
1007         FromDir = TmpFromDir;
1008         CurDir = TmpCurDir;
1009         break;
1010       }
1011     }
1012   }
1013 
1014   // Do a standard file entry lookup.
1015   OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1016       Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath,
1017       RelativePath, RequestingModule, SuggestedModule, IsMapped,
1018       IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1019   if (FE) {
1020     if (SuggestedModule && !LangOpts.AsmPreprocessor)
1021       HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
1022           RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
1023           Filename, *FE);
1024     return FE;
1025   }
1026 
1027   const FileEntry *CurFileEnt;
1028   // Otherwise, see if this is a subframework header.  If so, this is relative
1029   // to one of the headers on the #include stack.  Walk the list of the current
1030   // headers on the #include stack and pass them to HeaderInfo.
1031   if (IsFileLexer()) {
1032     if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1033       if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1034               Filename, CurFileEnt, SearchPath, RelativePath, RequestingModule,
1035               SuggestedModule)) {
1036         if (SuggestedModule && !LangOpts.AsmPreprocessor)
1037           HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
1038               RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
1039               Filename, *FE);
1040         return FE;
1041       }
1042     }
1043   }
1044 
1045   for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1046     if (IsFileLexer(ISEntry)) {
1047       if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1048         if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1049                 Filename, CurFileEnt, SearchPath, RelativePath,
1050                 RequestingModule, SuggestedModule)) {
1051           if (SuggestedModule && !LangOpts.AsmPreprocessor)
1052             HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
1053                 RequestingModule, RequestingModuleIsModuleInterface,
1054                 FilenameLoc, Filename, *FE);
1055           return FE;
1056         }
1057       }
1058     }
1059   }
1060 
1061   // Otherwise, we really couldn't find the file.
1062   return std::nullopt;
1063 }
1064 
1065 //===----------------------------------------------------------------------===//
1066 // Preprocessor Directive Handling.
1067 //===----------------------------------------------------------------------===//
1068 
1069 class Preprocessor::ResetMacroExpansionHelper {
1070 public:
ResetMacroExpansionHelper(Preprocessor * pp)1071   ResetMacroExpansionHelper(Preprocessor *pp)
1072     : PP(pp), save(pp->DisableMacroExpansion) {
1073     if (pp->MacroExpansionInDirectivesOverride)
1074       pp->DisableMacroExpansion = false;
1075   }
1076 
~ResetMacroExpansionHelper()1077   ~ResetMacroExpansionHelper() {
1078     PP->DisableMacroExpansion = save;
1079   }
1080 
1081 private:
1082   Preprocessor *PP;
1083   bool save;
1084 };
1085 
1086 /// Process a directive while looking for the through header or a #pragma
1087 /// hdrstop. The following directives are handled:
1088 /// #include (to check if it is the through header)
1089 /// #define (to warn about macros that don't match the PCH)
1090 /// #pragma (to check for pragma hdrstop).
1091 /// All other directives are completely discarded.
HandleSkippedDirectiveWhileUsingPCH(Token & Result,SourceLocation HashLoc)1092 void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1093                                                        SourceLocation HashLoc) {
1094   if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1095     if (II->getPPKeywordID() == tok::pp_define) {
1096       return HandleDefineDirective(Result,
1097                                    /*ImmediatelyAfterHeaderGuard=*/false);
1098     }
1099     if (SkippingUntilPCHThroughHeader &&
1100         II->getPPKeywordID() == tok::pp_include) {
1101       return HandleIncludeDirective(HashLoc, Result);
1102     }
1103     if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1104       Lex(Result);
1105       auto *II = Result.getIdentifierInfo();
1106       if (II && II->getName() == "hdrstop")
1107         return HandlePragmaHdrstop(Result);
1108     }
1109   }
1110   DiscardUntilEndOfDirective();
1111 }
1112 
1113 /// HandleDirective - This callback is invoked when the lexer sees a # token
1114 /// at the start of a line.  This consumes the directive, modifies the
1115 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
1116 /// read is the correct one.
HandleDirective(Token & Result)1117 void Preprocessor::HandleDirective(Token &Result) {
1118   // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1119 
1120   // We just parsed a # character at the start of a line, so we're in directive
1121   // mode.  Tell the lexer this so any newlines we see will be converted into an
1122   // EOD token (which terminates the directive).
1123   CurPPLexer->ParsingPreprocessorDirective = true;
1124   if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1125 
1126   bool ImmediatelyAfterTopLevelIfndef =
1127       CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1128   CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1129 
1130   ++NumDirectives;
1131 
1132   // We are about to read a token.  For the multiple-include optimization FA to
1133   // work, we have to remember if we had read any tokens *before* this
1134   // pp-directive.
1135   bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1136 
1137   // Save the '#' token in case we need to return it later.
1138   Token SavedHash = Result;
1139 
1140   // Read the next token, the directive flavor.  This isn't expanded due to
1141   // C99 6.10.3p8.
1142   LexUnexpandedToken(Result);
1143 
1144   // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
1145   //   #define A(x) #x
1146   //   A(abc
1147   //     #warning blah
1148   //   def)
1149   // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1150   // not support this for #include-like directives, since that can result in
1151   // terrible diagnostics, and does not work in GCC.
1152   if (InMacroArgs) {
1153     if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1154       switch (II->getPPKeywordID()) {
1155       case tok::pp_include:
1156       case tok::pp_import:
1157       case tok::pp_include_next:
1158       case tok::pp___include_macros:
1159       case tok::pp_pragma:
1160         Diag(Result, diag::err_embedded_directive) << II->getName();
1161         Diag(*ArgMacro, diag::note_macro_expansion_here)
1162             << ArgMacro->getIdentifierInfo();
1163         DiscardUntilEndOfDirective();
1164         return;
1165       default:
1166         break;
1167       }
1168     }
1169     Diag(Result, diag::ext_embedded_directive);
1170   }
1171 
1172   // Temporarily enable macro expansion if set so
1173   // and reset to previous state when returning from this function.
1174   ResetMacroExpansionHelper helper(this);
1175 
1176   if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1177     return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());
1178 
1179   switch (Result.getKind()) {
1180   case tok::eod:
1181     return;   // null directive.
1182   case tok::code_completion:
1183     setCodeCompletionReached();
1184     if (CodeComplete)
1185       CodeComplete->CodeCompleteDirective(
1186                                     CurPPLexer->getConditionalStackDepth() > 0);
1187     return;
1188   case tok::numeric_constant:  // # 7  GNU line marker directive.
1189     if (getLangOpts().AsmPreprocessor)
1190       break;  // # 4 is not a preprocessor directive in .S files.
1191     return HandleDigitDirective(Result);
1192   default:
1193     IdentifierInfo *II = Result.getIdentifierInfo();
1194     if (!II) break; // Not an identifier.
1195 
1196     // Ask what the preprocessor keyword ID is.
1197     switch (II->getPPKeywordID()) {
1198     default: break;
1199     // C99 6.10.1 - Conditional Inclusion.
1200     case tok::pp_if:
1201       return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);
1202     case tok::pp_ifdef:
1203       return HandleIfdefDirective(Result, SavedHash, false,
1204                                   true /*not valid for miopt*/);
1205     case tok::pp_ifndef:
1206       return HandleIfdefDirective(Result, SavedHash, true,
1207                                   ReadAnyTokensBeforeDirective);
1208     case tok::pp_elif:
1209     case tok::pp_elifdef:
1210     case tok::pp_elifndef:
1211       return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID());
1212 
1213     case tok::pp_else:
1214       return HandleElseDirective(Result, SavedHash);
1215     case tok::pp_endif:
1216       return HandleEndifDirective(Result);
1217 
1218     // C99 6.10.2 - Source File Inclusion.
1219     case tok::pp_include:
1220       // Handle #include.
1221       return HandleIncludeDirective(SavedHash.getLocation(), Result);
1222     case tok::pp___include_macros:
1223       // Handle -imacros.
1224       return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
1225 
1226     // C99 6.10.3 - Macro Replacement.
1227     case tok::pp_define:
1228       return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
1229     case tok::pp_undef:
1230       return HandleUndefDirective();
1231 
1232     // C99 6.10.4 - Line Control.
1233     case tok::pp_line:
1234       return HandleLineDirective();
1235 
1236     // C99 6.10.5 - Error Directive.
1237     case tok::pp_error:
1238       return HandleUserDiagnosticDirective(Result, false);
1239 
1240     // C99 6.10.6 - Pragma Directive.
1241     case tok::pp_pragma:
1242       return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
1243 
1244     // GNU Extensions.
1245     case tok::pp_import:
1246       return HandleImportDirective(SavedHash.getLocation(), Result);
1247     case tok::pp_include_next:
1248       return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
1249 
1250     case tok::pp_warning:
1251       if (LangOpts.CPlusPlus)
1252         Diag(Result, LangOpts.CPlusPlus2b
1253                          ? diag::warn_cxx2b_compat_warning_directive
1254                          : diag::ext_pp_warning_directive)
1255             << /*C++2b*/ 1;
1256       else
1257         Diag(Result, LangOpts.C2x ? diag::warn_c2x_compat_warning_directive
1258                                   : diag::ext_pp_warning_directive)
1259             << /*C2x*/ 0;
1260 
1261       return HandleUserDiagnosticDirective(Result, true);
1262     case tok::pp_ident:
1263       return HandleIdentSCCSDirective(Result);
1264     case tok::pp_sccs:
1265       return HandleIdentSCCSDirective(Result);
1266     case tok::pp_assert:
1267       //isExtension = true;  // FIXME: implement #assert
1268       break;
1269     case tok::pp_unassert:
1270       //isExtension = true;  // FIXME: implement #unassert
1271       break;
1272 
1273     case tok::pp___public_macro:
1274       if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1275         return HandleMacroPublicDirective(Result);
1276       break;
1277 
1278     case tok::pp___private_macro:
1279       if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1280         return HandleMacroPrivateDirective();
1281       break;
1282     }
1283     break;
1284   }
1285 
1286   // If this is a .S file, treat unknown # directives as non-preprocessor
1287   // directives.  This is important because # may be a comment or introduce
1288   // various pseudo-ops.  Just return the # token and push back the following
1289   // token to be lexed next time.
1290   if (getLangOpts().AsmPreprocessor) {
1291     auto Toks = std::make_unique<Token[]>(2);
1292     // Return the # and the token after it.
1293     Toks[0] = SavedHash;
1294     Toks[1] = Result;
1295 
1296     // If the second token is a hashhash token, then we need to translate it to
1297     // unknown so the token lexer doesn't try to perform token pasting.
1298     if (Result.is(tok::hashhash))
1299       Toks[1].setKind(tok::unknown);
1300 
1301     // Enter this token stream so that we re-lex the tokens.  Make sure to
1302     // enable macro expansion, in case the token after the # is an identifier
1303     // that is expanded.
1304     EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
1305     return;
1306   }
1307 
1308   // If we reached here, the preprocessing token is not valid!
1309   // Start suggesting if a similar directive found.
1310   Diag(Result, diag::err_pp_invalid_directive) << 0;
1311 
1312   // Read the rest of the PP line.
1313   DiscardUntilEndOfDirective();
1314 
1315   // Okay, we're done parsing the directive.
1316 }
1317 
1318 /// GetLineValue - Convert a numeric token into an unsigned value, emitting
1319 /// Diagnostic DiagID if it is invalid, and returning the value in Val.
GetLineValue(Token & DigitTok,unsigned & Val,unsigned DiagID,Preprocessor & PP,bool IsGNULineDirective=false)1320 static bool GetLineValue(Token &DigitTok, unsigned &Val,
1321                          unsigned DiagID, Preprocessor &PP,
1322                          bool IsGNULineDirective=false) {
1323   if (DigitTok.isNot(tok::numeric_constant)) {
1324     PP.Diag(DigitTok, DiagID);
1325 
1326     if (DigitTok.isNot(tok::eod))
1327       PP.DiscardUntilEndOfDirective();
1328     return true;
1329   }
1330 
1331   SmallString<64> IntegerBuffer;
1332   IntegerBuffer.resize(DigitTok.getLength());
1333   const char *DigitTokBegin = &IntegerBuffer[0];
1334   bool Invalid = false;
1335   unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
1336   if (Invalid)
1337     return true;
1338 
1339   // Verify that we have a simple digit-sequence, and compute the value.  This
1340   // is always a simple digit string computed in decimal, so we do this manually
1341   // here.
1342   Val = 0;
1343   for (unsigned i = 0; i != ActualLength; ++i) {
1344     // C++1y [lex.fcon]p1:
1345     //   Optional separating single quotes in a digit-sequence are ignored
1346     if (DigitTokBegin[i] == '\'')
1347       continue;
1348 
1349     if (!isDigit(DigitTokBegin[i])) {
1350       PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
1351               diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1352       PP.DiscardUntilEndOfDirective();
1353       return true;
1354     }
1355 
1356     unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1357     if (NextVal < Val) { // overflow.
1358       PP.Diag(DigitTok, DiagID);
1359       PP.DiscardUntilEndOfDirective();
1360       return true;
1361     }
1362     Val = NextVal;
1363   }
1364 
1365   if (DigitTokBegin[0] == '0' && Val)
1366     PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
1367       << IsGNULineDirective;
1368 
1369   return false;
1370 }
1371 
1372 /// Handle a \#line directive: C99 6.10.4.
1373 ///
1374 /// The two acceptable forms are:
1375 /// \verbatim
1376 ///   # line digit-sequence
1377 ///   # line digit-sequence "s-char-sequence"
1378 /// \endverbatim
HandleLineDirective()1379 void Preprocessor::HandleLineDirective() {
1380   // Read the line # and string argument.  Per C99 6.10.4p5, these tokens are
1381   // expanded.
1382   Token DigitTok;
1383   Lex(DigitTok);
1384 
1385   // Validate the number and convert it to an unsigned.
1386   unsigned LineNo;
1387   if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
1388     return;
1389 
1390   if (LineNo == 0)
1391     Diag(DigitTok, diag::ext_pp_line_zero);
1392 
1393   // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1394   // number greater than 2147483647".  C90 requires that the line # be <= 32767.
1395   unsigned LineLimit = 32768U;
1396   if (LangOpts.C99 || LangOpts.CPlusPlus11)
1397     LineLimit = 2147483648U;
1398   if (LineNo >= LineLimit)
1399     Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
1400   else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1401     Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
1402 
1403   int FilenameID = -1;
1404   Token StrTok;
1405   Lex(StrTok);
1406 
1407   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1408   // string followed by eod.
1409   if (StrTok.is(tok::eod))
1410     ; // ok
1411   else if (StrTok.isNot(tok::string_literal)) {
1412     Diag(StrTok, diag::err_pp_line_invalid_filename);
1413     DiscardUntilEndOfDirective();
1414     return;
1415   } else if (StrTok.hasUDSuffix()) {
1416     Diag(StrTok, diag::err_invalid_string_udl);
1417     DiscardUntilEndOfDirective();
1418     return;
1419   } else {
1420     // Parse and validate the string, converting it into a unique ID.
1421     StringLiteralParser Literal(StrTok, *this);
1422     assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1423     if (Literal.hadError) {
1424       DiscardUntilEndOfDirective();
1425       return;
1426     }
1427     if (Literal.Pascal) {
1428       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1429       DiscardUntilEndOfDirective();
1430       return;
1431     }
1432     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1433 
1434     // Verify that there is nothing after the string, other than EOD.  Because
1435     // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1436     CheckEndOfDirective("line", true);
1437   }
1438 
1439   // Take the file kind of the file containing the #line directive. #line
1440   // directives are often used for generated sources from the same codebase, so
1441   // the new file should generally be classified the same way as the current
1442   // file. This is visible in GCC's pre-processed output, which rewrites #line
1443   // to GNU line markers.
1444   SrcMgr::CharacteristicKind FileKind =
1445       SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1446 
1447   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
1448                         false, FileKind);
1449 
1450   if (Callbacks)
1451     Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
1452                            PPCallbacks::RenameFile, FileKind);
1453 }
1454 
1455 /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1456 /// marker directive.
ReadLineMarkerFlags(bool & IsFileEntry,bool & IsFileExit,SrcMgr::CharacteristicKind & FileKind,Preprocessor & PP)1457 static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1458                                 SrcMgr::CharacteristicKind &FileKind,
1459                                 Preprocessor &PP) {
1460   unsigned FlagVal;
1461   Token FlagTok;
1462   PP.Lex(FlagTok);
1463   if (FlagTok.is(tok::eod)) return false;
1464   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1465     return true;
1466 
1467   if (FlagVal == 1) {
1468     IsFileEntry = true;
1469 
1470     PP.Lex(FlagTok);
1471     if (FlagTok.is(tok::eod)) return false;
1472     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1473       return true;
1474   } else if (FlagVal == 2) {
1475     IsFileExit = true;
1476 
1477     SourceManager &SM = PP.getSourceManager();
1478     // If we are leaving the current presumed file, check to make sure the
1479     // presumed include stack isn't empty!
1480     FileID CurFileID =
1481       SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
1482     PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
1483     if (PLoc.isInvalid())
1484       return true;
1485 
1486     // If there is no include loc (main file) or if the include loc is in a
1487     // different physical file, then we aren't in a "1" line marker flag region.
1488     SourceLocation IncLoc = PLoc.getIncludeLoc();
1489     if (IncLoc.isInvalid() ||
1490         SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
1491       PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
1492       PP.DiscardUntilEndOfDirective();
1493       return true;
1494     }
1495 
1496     PP.Lex(FlagTok);
1497     if (FlagTok.is(tok::eod)) return false;
1498     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1499       return true;
1500   }
1501 
1502   // We must have 3 if there are still flags.
1503   if (FlagVal != 3) {
1504     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1505     PP.DiscardUntilEndOfDirective();
1506     return true;
1507   }
1508 
1509   FileKind = SrcMgr::C_System;
1510 
1511   PP.Lex(FlagTok);
1512   if (FlagTok.is(tok::eod)) return false;
1513   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1514     return true;
1515 
1516   // We must have 4 if there is yet another flag.
1517   if (FlagVal != 4) {
1518     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1519     PP.DiscardUntilEndOfDirective();
1520     return true;
1521   }
1522 
1523   FileKind = SrcMgr::C_ExternCSystem;
1524 
1525   PP.Lex(FlagTok);
1526   if (FlagTok.is(tok::eod)) return false;
1527 
1528   // There are no more valid flags here.
1529   PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1530   PP.DiscardUntilEndOfDirective();
1531   return true;
1532 }
1533 
1534 /// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1535 /// one of the following forms:
1536 ///
1537 ///     # 42
1538 ///     # 42 "file" ('1' | '2')?
1539 ///     # 42 "file" ('1' | '2')? '3' '4'?
1540 ///
HandleDigitDirective(Token & DigitTok)1541 void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1542   // Validate the number and convert it to an unsigned.  GNU does not have a
1543   // line # limit other than it fit in 32-bits.
1544   unsigned LineNo;
1545   if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
1546                    *this, true))
1547     return;
1548 
1549   Token StrTok;
1550   Lex(StrTok);
1551 
1552   bool IsFileEntry = false, IsFileExit = false;
1553   int FilenameID = -1;
1554   SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1555 
1556   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1557   // string followed by eod.
1558   if (StrTok.is(tok::eod)) {
1559     Diag(StrTok, diag::ext_pp_gnu_line_directive);
1560     // Treat this like "#line NN", which doesn't change file characteristics.
1561     FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1562   } else if (StrTok.isNot(tok::string_literal)) {
1563     Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1564     DiscardUntilEndOfDirective();
1565     return;
1566   } else if (StrTok.hasUDSuffix()) {
1567     Diag(StrTok, diag::err_invalid_string_udl);
1568     DiscardUntilEndOfDirective();
1569     return;
1570   } else {
1571     // Parse and validate the string, converting it into a unique ID.
1572     StringLiteralParser Literal(StrTok, *this);
1573     assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1574     if (Literal.hadError) {
1575       DiscardUntilEndOfDirective();
1576       return;
1577     }
1578     if (Literal.Pascal) {
1579       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1580       DiscardUntilEndOfDirective();
1581       return;
1582     }
1583 
1584     // If a filename was present, read any flags that are present.
1585     if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
1586       return;
1587     if (!SourceMgr.isWrittenInBuiltinFile(DigitTok.getLocation()) &&
1588         !SourceMgr.isWrittenInCommandLineFile(DigitTok.getLocation()))
1589       Diag(StrTok, diag::ext_pp_gnu_line_directive);
1590 
1591     // Exiting to an empty string means pop to the including file, so leave
1592     // FilenameID as -1 in that case.
1593     if (!(IsFileExit && Literal.GetString().empty()))
1594       FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1595   }
1596 
1597   // Create a line note with this information.
1598   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1599                         IsFileExit, FileKind);
1600 
1601   // If the preprocessor has callbacks installed, notify them of the #line
1602   // change.  This is used so that the line marker comes out in -E mode for
1603   // example.
1604   if (Callbacks) {
1605     PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1606     if (IsFileEntry)
1607       Reason = PPCallbacks::EnterFile;
1608     else if (IsFileExit)
1609       Reason = PPCallbacks::ExitFile;
1610 
1611     Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
1612   }
1613 }
1614 
1615 /// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1616 ///
HandleUserDiagnosticDirective(Token & Tok,bool isWarning)1617 void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1618                                                  bool isWarning) {
1619   // Read the rest of the line raw.  We do this because we don't want macros
1620   // to be expanded and we don't require that the tokens be valid preprocessing
1621   // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
1622   // collapse multiple consecutive white space between tokens, but this isn't
1623   // specified by the standard.
1624   SmallString<128> Message;
1625   CurLexer->ReadToEndOfLine(&Message);
1626 
1627   // Find the first non-whitespace character, so that we can make the
1628   // diagnostic more succinct.
1629   StringRef Msg = Message.str().ltrim(' ');
1630 
1631   if (isWarning)
1632     Diag(Tok, diag::pp_hash_warning) << Msg;
1633   else
1634     Diag(Tok, diag::err_pp_hash_error) << Msg;
1635 }
1636 
1637 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1638 ///
HandleIdentSCCSDirective(Token & Tok)1639 void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1640   // Yes, this directive is an extension.
1641   Diag(Tok, diag::ext_pp_ident_directive);
1642 
1643   // Read the string argument.
1644   Token StrTok;
1645   Lex(StrTok);
1646 
1647   // If the token kind isn't a string, it's a malformed directive.
1648   if (StrTok.isNot(tok::string_literal) &&
1649       StrTok.isNot(tok::wide_string_literal)) {
1650     Diag(StrTok, diag::err_pp_malformed_ident);
1651     if (StrTok.isNot(tok::eod))
1652       DiscardUntilEndOfDirective();
1653     return;
1654   }
1655 
1656   if (StrTok.hasUDSuffix()) {
1657     Diag(StrTok, diag::err_invalid_string_udl);
1658     DiscardUntilEndOfDirective();
1659     return;
1660   }
1661 
1662   // Verify that there is nothing after the string, other than EOD.
1663   CheckEndOfDirective("ident");
1664 
1665   if (Callbacks) {
1666     bool Invalid = false;
1667     std::string Str = getSpelling(StrTok, &Invalid);
1668     if (!Invalid)
1669       Callbacks->Ident(Tok.getLocation(), Str);
1670   }
1671 }
1672 
1673 /// Handle a #public directive.
HandleMacroPublicDirective(Token & Tok)1674 void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1675   Token MacroNameTok;
1676   ReadMacroName(MacroNameTok, MU_Undef);
1677 
1678   // Error reading macro name?  If so, diagnostic already issued.
1679   if (MacroNameTok.is(tok::eod))
1680     return;
1681 
1682   // Check to see if this is the last token on the #__public_macro line.
1683   CheckEndOfDirective("__public_macro");
1684 
1685   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1686   // Okay, we finally have a valid identifier to undef.
1687   MacroDirective *MD = getLocalMacroDirective(II);
1688 
1689   // If the macro is not defined, this is an error.
1690   if (!MD) {
1691     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1692     return;
1693   }
1694 
1695   // Note that this macro has now been exported.
1696   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1697                                 MacroNameTok.getLocation(), /*isPublic=*/true));
1698 }
1699 
1700 /// Handle a #private directive.
HandleMacroPrivateDirective()1701 void Preprocessor::HandleMacroPrivateDirective() {
1702   Token MacroNameTok;
1703   ReadMacroName(MacroNameTok, MU_Undef);
1704 
1705   // Error reading macro name?  If so, diagnostic already issued.
1706   if (MacroNameTok.is(tok::eod))
1707     return;
1708 
1709   // Check to see if this is the last token on the #__private_macro line.
1710   CheckEndOfDirective("__private_macro");
1711 
1712   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1713   // Okay, we finally have a valid identifier to undef.
1714   MacroDirective *MD = getLocalMacroDirective(II);
1715 
1716   // If the macro is not defined, this is an error.
1717   if (!MD) {
1718     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1719     return;
1720   }
1721 
1722   // Note that this macro has now been marked private.
1723   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1724                                MacroNameTok.getLocation(), /*isPublic=*/false));
1725 }
1726 
1727 //===----------------------------------------------------------------------===//
1728 // Preprocessor Include Directive Handling.
1729 //===----------------------------------------------------------------------===//
1730 
1731 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1732 /// checked and spelled filename, e.g. as an operand of \#include. This returns
1733 /// true if the input filename was in <>'s or false if it were in ""'s.  The
1734 /// caller is expected to provide a buffer that is large enough to hold the
1735 /// spelling of the filename, but is also expected to handle the case when
1736 /// this method decides to use a different buffer.
GetIncludeFilenameSpelling(SourceLocation Loc,StringRef & Buffer)1737 bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1738                                               StringRef &Buffer) {
1739   // Get the text form of the filename.
1740   assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1741 
1742   // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1743   // C++20 [lex.header]/2:
1744   //
1745   // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1746   //   in C: behavior is undefined
1747   //   in C++: program is conditionally-supported with implementation-defined
1748   //           semantics
1749 
1750   // Make sure the filename is <x> or "x".
1751   bool isAngled;
1752   if (Buffer[0] == '<') {
1753     if (Buffer.back() != '>') {
1754       Diag(Loc, diag::err_pp_expects_filename);
1755       Buffer = StringRef();
1756       return true;
1757     }
1758     isAngled = true;
1759   } else if (Buffer[0] == '"') {
1760     if (Buffer.back() != '"') {
1761       Diag(Loc, diag::err_pp_expects_filename);
1762       Buffer = StringRef();
1763       return true;
1764     }
1765     isAngled = false;
1766   } else {
1767     Diag(Loc, diag::err_pp_expects_filename);
1768     Buffer = StringRef();
1769     return true;
1770   }
1771 
1772   // Diagnose #include "" as invalid.
1773   if (Buffer.size() <= 2) {
1774     Diag(Loc, diag::err_pp_empty_filename);
1775     Buffer = StringRef();
1776     return true;
1777   }
1778 
1779   // Skip the brackets.
1780   Buffer = Buffer.substr(1, Buffer.size()-2);
1781   return isAngled;
1782 }
1783 
1784 /// Push a token onto the token stream containing an annotation.
EnterAnnotationToken(SourceRange Range,tok::TokenKind Kind,void * AnnotationVal)1785 void Preprocessor::EnterAnnotationToken(SourceRange Range,
1786                                         tok::TokenKind Kind,
1787                                         void *AnnotationVal) {
1788   // FIXME: Produce this as the current token directly, rather than
1789   // allocating a new token for it.
1790   auto Tok = std::make_unique<Token[]>(1);
1791   Tok[0].startToken();
1792   Tok[0].setKind(Kind);
1793   Tok[0].setLocation(Range.getBegin());
1794   Tok[0].setAnnotationEndLoc(Range.getEnd());
1795   Tok[0].setAnnotationValue(AnnotationVal);
1796   EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
1797 }
1798 
1799 /// Produce a diagnostic informing the user that a #include or similar
1800 /// was implicitly treated as a module import.
diagnoseAutoModuleImport(Preprocessor & PP,SourceLocation HashLoc,Token & IncludeTok,ArrayRef<std::pair<IdentifierInfo *,SourceLocation>> Path,SourceLocation PathEnd)1801 static void diagnoseAutoModuleImport(
1802     Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
1803     ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
1804     SourceLocation PathEnd) {
1805   SmallString<128> PathString;
1806   for (size_t I = 0, N = Path.size(); I != N; ++I) {
1807     if (I)
1808       PathString += '.';
1809     PathString += Path[I].first->getName();
1810   }
1811 
1812   int IncludeKind = 0;
1813   switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1814   case tok::pp_include:
1815     IncludeKind = 0;
1816     break;
1817 
1818   case tok::pp_import:
1819     IncludeKind = 1;
1820     break;
1821 
1822   case tok::pp_include_next:
1823     IncludeKind = 2;
1824     break;
1825 
1826   case tok::pp___include_macros:
1827     IncludeKind = 3;
1828     break;
1829 
1830   default:
1831     llvm_unreachable("unknown include directive kind");
1832   }
1833 
1834   PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation)
1835       << IncludeKind << PathString;
1836 }
1837 
1838 // Given a vector of path components and a string containing the real
1839 // path to the file, build a properly-cased replacement in the vector,
1840 // and return true if the replacement should be suggested.
trySimplifyPath(SmallVectorImpl<StringRef> & Components,StringRef RealPathName)1841 static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1842                             StringRef RealPathName) {
1843   auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
1844   auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
1845   int Cnt = 0;
1846   bool SuggestReplacement = false;
1847   // Below is a best-effort to handle ".." in paths. It is admittedly
1848   // not 100% correct in the presence of symlinks.
1849   for (auto &Component : llvm::reverse(Components)) {
1850     if ("." == Component) {
1851     } else if (".." == Component) {
1852       ++Cnt;
1853     } else if (Cnt) {
1854       --Cnt;
1855     } else if (RealPathComponentIter != RealPathComponentEnd) {
1856       if (Component != *RealPathComponentIter) {
1857         // If these path components differ by more than just case, then we
1858         // may be looking at symlinked paths. Bail on this diagnostic to avoid
1859         // noisy false positives.
1860         SuggestReplacement =
1861             RealPathComponentIter->equals_insensitive(Component);
1862         if (!SuggestReplacement)
1863           break;
1864         Component = *RealPathComponentIter;
1865       }
1866       ++RealPathComponentIter;
1867     }
1868   }
1869   return SuggestReplacement;
1870 }
1871 
checkModuleIsAvailable(const LangOptions & LangOpts,const TargetInfo & TargetInfo,DiagnosticsEngine & Diags,Module * M)1872 bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
1873                                           const TargetInfo &TargetInfo,
1874                                           DiagnosticsEngine &Diags, Module *M) {
1875   Module::Requirement Requirement;
1876   Module::UnresolvedHeaderDirective MissingHeader;
1877   Module *ShadowingModule = nullptr;
1878   if (M->isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
1879                      ShadowingModule))
1880     return false;
1881 
1882   if (MissingHeader.FileNameLoc.isValid()) {
1883     Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
1884         << MissingHeader.IsUmbrella << MissingHeader.FileName;
1885   } else if (ShadowingModule) {
1886     Diags.Report(M->DefinitionLoc, diag::err_module_shadowed) << M->Name;
1887     Diags.Report(ShadowingModule->DefinitionLoc,
1888                  diag::note_previous_definition);
1889   } else {
1890     // FIXME: Track the location at which the requirement was specified, and
1891     // use it here.
1892     Diags.Report(M->DefinitionLoc, diag::err_module_unavailable)
1893         << M->getFullModuleName() << Requirement.second << Requirement.first;
1894   }
1895   return true;
1896 }
1897 
1898 std::pair<ConstSearchDirIterator, const FileEntry *>
getIncludeNextStart(const Token & IncludeNextTok) const1899 Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
1900   // #include_next is like #include, except that we start searching after
1901   // the current found directory.  If we can't do this, issue a
1902   // diagnostic.
1903   ConstSearchDirIterator Lookup = CurDirLookup;
1904   const FileEntry *LookupFromFile = nullptr;
1905 
1906   if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
1907     // If the main file is a header, then it's either for PCH/AST generation,
1908     // or libclang opened it. Either way, handle it as a normal include below
1909     // and do not complain about include_next.
1910   } else if (isInPrimaryFile()) {
1911     Lookup = nullptr;
1912     Diag(IncludeNextTok, diag::pp_include_next_in_primary);
1913   } else if (CurLexerSubmodule) {
1914     // Start looking up in the directory *after* the one in which the current
1915     // file would be found, if any.
1916     assert(CurPPLexer && "#include_next directive in macro?");
1917     LookupFromFile = CurPPLexer->getFileEntry();
1918     Lookup = nullptr;
1919   } else if (!Lookup) {
1920     // The current file was not found by walking the include path. Either it
1921     // is the primary file (handled above), or it was found by absolute path,
1922     // or it was found relative to such a file.
1923     // FIXME: Track enough information so we know which case we're in.
1924     Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
1925   } else {
1926     // Start looking up in the next directory.
1927     ++Lookup;
1928   }
1929 
1930   return {Lookup, LookupFromFile};
1931 }
1932 
1933 /// HandleIncludeDirective - The "\#include" tokens have just been read, read
1934 /// the file to be included from the lexer, then include it!  This is a common
1935 /// routine with functionality shared between \#include, \#include_next and
1936 /// \#import.  LookupFrom is set when this is a \#include_next directive, it
1937 /// specifies the file to start searching from.
HandleIncludeDirective(SourceLocation HashLoc,Token & IncludeTok,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)1938 void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
1939                                           Token &IncludeTok,
1940                                           ConstSearchDirIterator LookupFrom,
1941                                           const FileEntry *LookupFromFile) {
1942   Token FilenameTok;
1943   if (LexHeaderName(FilenameTok))
1944     return;
1945 
1946   if (FilenameTok.isNot(tok::header_name)) {
1947     Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
1948     if (FilenameTok.isNot(tok::eod))
1949       DiscardUntilEndOfDirective();
1950     return;
1951   }
1952 
1953   // Verify that there is nothing after the filename, other than EOD.  Note
1954   // that we allow macros that expand to nothing after the filename, because
1955   // this falls into the category of "#include pp-tokens new-line" specified
1956   // in C99 6.10.2p4.
1957   SourceLocation EndLoc =
1958       CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
1959 
1960   auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
1961                                             EndLoc, LookupFrom, LookupFromFile);
1962   switch (Action.Kind) {
1963   case ImportAction::None:
1964   case ImportAction::SkippedModuleImport:
1965     break;
1966   case ImportAction::ModuleBegin:
1967     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
1968                          tok::annot_module_begin, Action.ModuleForHeader);
1969     break;
1970   case ImportAction::HeaderUnitImport:
1971     EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
1972                          Action.ModuleForHeader);
1973     break;
1974   case ImportAction::ModuleImport:
1975     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
1976                          tok::annot_module_include, Action.ModuleForHeader);
1977     break;
1978   case ImportAction::Failure:
1979     assert(TheModuleLoader.HadFatalFailure &&
1980            "This should be an early exit only to a fatal error");
1981     TheModuleLoader.HadFatalFailure = true;
1982     IncludeTok.setKind(tok::eof);
1983     CurLexer->cutOffLexing();
1984     return;
1985   }
1986 }
1987 
LookupHeaderIncludeOrImport(ConstSearchDirIterator * CurDir,StringRef & Filename,SourceLocation FilenameLoc,CharSourceRange FilenameRange,const Token & FilenameTok,bool & IsFrameworkFound,bool IsImportDecl,bool & IsMapped,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile,StringRef & LookupFilename,SmallVectorImpl<char> & RelativePath,SmallVectorImpl<char> & SearchPath,ModuleMap::KnownHeader & SuggestedModule,bool isAngled)1988 OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
1989     ConstSearchDirIterator *CurDir, StringRef &Filename,
1990     SourceLocation FilenameLoc, CharSourceRange FilenameRange,
1991     const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
1992     bool &IsMapped, ConstSearchDirIterator LookupFrom,
1993     const FileEntry *LookupFromFile, StringRef &LookupFilename,
1994     SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
1995     ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
1996   OptionalFileEntryRef File = LookupFile(
1997       FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir,
1998       Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
1999       &SuggestedModule, &IsMapped, &IsFrameworkFound);
2000   if (File)
2001     return File;
2002 
2003   // Give the clients a chance to silently skip this include.
2004   if (Callbacks && Callbacks->FileNotFound(Filename))
2005     return std::nullopt;
2006 
2007   if (SuppressIncludeNotFoundError)
2008     return std::nullopt;
2009 
2010   // If the file could not be located and it was included via angle
2011   // brackets, we can attempt a lookup as though it were a quoted path to
2012   // provide the user with a possible fixit.
2013   if (isAngled) {
2014     OptionalFileEntryRef File = LookupFile(
2015         FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir,
2016         Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2017         &SuggestedModule, &IsMapped,
2018         /*IsFrameworkFound=*/nullptr);
2019     if (File) {
2020       Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
2021           << Filename << IsImportDecl
2022           << FixItHint::CreateReplacement(FilenameRange,
2023                                           "\"" + Filename.str() + "\"");
2024       return File;
2025     }
2026   }
2027 
2028   // Check for likely typos due to leading or trailing non-isAlphanumeric
2029   // characters
2030   StringRef OriginalFilename = Filename;
2031   if (LangOpts.SpellChecking) {
2032     // A heuristic to correct a typo file name by removing leading and
2033     // trailing non-isAlphanumeric characters.
2034     auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2035       Filename = Filename.drop_until(isAlphanumeric);
2036       while (!Filename.empty() && !isAlphanumeric(Filename.back())) {
2037         Filename = Filename.drop_back();
2038       }
2039       return Filename;
2040     };
2041     StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2042     StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2043 
2044     OptionalFileEntryRef File = LookupFile(
2045         FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom,
2046         LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr,
2047         Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
2048         /*IsFrameworkFound=*/nullptr);
2049     if (File) {
2050       auto Hint =
2051           isAngled ? FixItHint::CreateReplacement(
2052                          FilenameRange, "<" + TypoCorrectionName.str() + ">")
2053                    : FixItHint::CreateReplacement(
2054                          FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
2055       Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
2056           << OriginalFilename << TypoCorrectionName << Hint;
2057       // We found the file, so set the Filename to the name after typo
2058       // correction.
2059       Filename = TypoCorrectionName;
2060       LookupFilename = TypoCorrectionLookupName;
2061       return File;
2062     }
2063   }
2064 
2065   // If the file is still not found, just go with the vanilla diagnostic
2066   assert(!File && "expected missing file");
2067   Diag(FilenameTok, diag::err_pp_file_not_found)
2068       << OriginalFilename << FilenameRange;
2069   if (IsFrameworkFound) {
2070     size_t SlashPos = OriginalFilename.find('/');
2071     assert(SlashPos != StringRef::npos &&
2072            "Include with framework name should have '/' in the filename");
2073     StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
2074     FrameworkCacheEntry &CacheEntry =
2075         HeaderInfo.LookupFrameworkCache(FrameworkName);
2076     assert(CacheEntry.Directory && "Found framework should be in cache");
2077     Diag(FilenameTok, diag::note_pp_framework_without_header)
2078         << OriginalFilename.substr(SlashPos + 1) << FrameworkName
2079         << CacheEntry.Directory->getName();
2080   }
2081 
2082   return std::nullopt;
2083 }
2084 
2085 /// Handle either a #include-like directive or an import declaration that names
2086 /// a header file.
2087 ///
2088 /// \param HashLoc The location of the '#' token for an include, or
2089 ///        SourceLocation() for an import declaration.
2090 /// \param IncludeTok The include / include_next / import token.
2091 /// \param FilenameTok The header-name token.
2092 /// \param EndLoc The location at which any imported macros become visible.
2093 /// \param LookupFrom For #include_next, the starting directory for the
2094 ///        directory lookup.
2095 /// \param LookupFromFile For #include_next, the starting file for the directory
2096 ///        lookup.
HandleHeaderIncludeOrImport(SourceLocation HashLoc,Token & IncludeTok,Token & FilenameTok,SourceLocation EndLoc,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)2097 Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2098     SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2099     SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2100     const FileEntry *LookupFromFile) {
2101   SmallString<128> FilenameBuffer;
2102   StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
2103   SourceLocation CharEnd = FilenameTok.getEndLoc();
2104 
2105   CharSourceRange FilenameRange
2106     = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
2107   StringRef OriginalFilename = Filename;
2108   bool isAngled =
2109     GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
2110 
2111   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2112   // error.
2113   if (Filename.empty())
2114     return {ImportAction::None};
2115 
2116   bool IsImportDecl = HashLoc.isInvalid();
2117   SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2118 
2119   // Complain about attempts to #include files in an audit pragma.
2120   if (PragmaARCCFCodeAuditedInfo.second.isValid()) {
2121     Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2122     Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here);
2123 
2124     // Immediately leave the pragma.
2125     PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
2126   }
2127 
2128   // Complain about attempts to #include files in an assume-nonnull pragma.
2129   if (PragmaAssumeNonNullLoc.isValid()) {
2130     Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2131     Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
2132 
2133     // Immediately leave the pragma.
2134     PragmaAssumeNonNullLoc = SourceLocation();
2135   }
2136 
2137   if (HeaderInfo.HasIncludeAliasMap()) {
2138     // Map the filename with the brackets still attached.  If the name doesn't
2139     // map to anything, fall back on the filename we've already gotten the
2140     // spelling for.
2141     StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
2142     if (!NewName.empty())
2143       Filename = NewName;
2144   }
2145 
2146   // Search include directories.
2147   bool IsMapped = false;
2148   bool IsFrameworkFound = false;
2149   ConstSearchDirIterator CurDir = nullptr;
2150   SmallString<1024> SearchPath;
2151   SmallString<1024> RelativePath;
2152   // We get the raw path only if we have 'Callbacks' to which we later pass
2153   // the path.
2154   ModuleMap::KnownHeader SuggestedModule;
2155   SourceLocation FilenameLoc = FilenameTok.getLocation();
2156   StringRef LookupFilename = Filename;
2157 
2158   // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2159   // is unnecessary on Windows since the filesystem there handles backslashes.
2160   SmallString<128> NormalizedPath;
2161   llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2162   if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
2163     NormalizedPath = Filename.str();
2164     llvm::sys::path::native(NormalizedPath);
2165     LookupFilename = NormalizedPath;
2166     BackslashStyle = llvm::sys::path::Style::windows;
2167   }
2168 
2169   OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2170       &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2171       IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2172       LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2173 
2174   if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2175     if (File && isPCHThroughHeader(&File->getFileEntry()))
2176       SkippingUntilPCHThroughHeader = false;
2177     return {ImportAction::None};
2178   }
2179 
2180   // Should we enter the source file? Set to Skip if either the source file is
2181   // known to have no effect beyond its effect on module visibility -- that is,
2182   // if it's got an include guard that is already defined, set to Import if it
2183   // is a modular header we've already built and should import.
2184 
2185   // For C++20 Modules
2186   // [cpp.include]/7 If the header identified by the header-name denotes an
2187   // importable header, it is implementation-defined whether the #include
2188   // preprocessing directive is instead replaced by an import directive.
2189   // For this implementation, the translation is permitted when we are parsing
2190   // the Global Module Fragment, and not otherwise (the cases where it would be
2191   // valid to replace an include with an import are highly constrained once in
2192   // named module purview; this choice avoids considerable complexity in
2193   // determining valid cases).
2194 
2195   enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2196 
2197   if (PPOpts->SingleFileParseMode)
2198     Action = IncludeLimitReached;
2199 
2200   // If we've reached the max allowed include depth, it is usually due to an
2201   // include cycle. Don't enter already processed files again as it can lead to
2202   // reaching the max allowed include depth again.
2203   if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2204       alreadyIncluded(*File))
2205     Action = IncludeLimitReached;
2206 
2207   // FIXME: We do not have a good way to disambiguate C++ clang modules from
2208   // C++ standard modules (other than use/non-use of Header Units).
2209   Module *SM = SuggestedModule.getModule();
2210 
2211   bool MaybeTranslateInclude =
2212       Action == Enter && File && SM && !SM->isForBuilding(getLangOpts());
2213 
2214   // Maybe a usable Header Unit
2215   bool UsableHeaderUnit = false;
2216   if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) {
2217     if (TrackGMFState.inGMF() || IsImportDecl)
2218       UsableHeaderUnit = true;
2219     else if (!IsImportDecl) {
2220       // This is a Header Unit that we do not include-translate
2221       SuggestedModule = ModuleMap::KnownHeader();
2222       SM = nullptr;
2223     }
2224   }
2225   // Maybe a usable clang header module.
2226   bool UsableClangHeaderModule =
2227       (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM &&
2228       !SM->isHeaderUnit();
2229 
2230   // Determine whether we should try to import the module for this #include, if
2231   // there is one. Don't do so if precompiled module support is disabled or we
2232   // are processing this module textually (because we're building the module).
2233   if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2234     // If this include corresponds to a module but that module is
2235     // unavailable, diagnose the situation and bail out.
2236     // FIXME: Remove this; loadModule does the same check (but produces
2237     // slightly worse diagnostics).
2238     if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), getDiagnostics(),
2239                                SuggestedModule.getModule())) {
2240       Diag(FilenameTok.getLocation(),
2241            diag::note_implicit_top_level_module_import_here)
2242           << SuggestedModule.getModule()->getTopLevelModuleName();
2243       return {ImportAction::None};
2244     }
2245 
2246     // Compute the module access path corresponding to this module.
2247     // FIXME: Should we have a second loadModule() overload to avoid this
2248     // extra lookup step?
2249     SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
2250     for (Module *Mod = SM; Mod; Mod = Mod->Parent)
2251       Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
2252                                     FilenameTok.getLocation()));
2253     std::reverse(Path.begin(), Path.end());
2254 
2255     // Warn that we're replacing the include/import with a module import.
2256     if (!IsImportDecl)
2257       diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
2258 
2259     // Load the module to import its macros. We'll make the declarations
2260     // visible when the parser gets here.
2261     // FIXME: Pass SuggestedModule in here rather than converting it to a path
2262     // and making the module loader convert it back again.
2263     ModuleLoadResult Imported = TheModuleLoader.loadModule(
2264         IncludeTok.getLocation(), Path, Module::Hidden,
2265         /*IsInclusionDirective=*/true);
2266     assert((Imported == nullptr || Imported == SuggestedModule.getModule()) &&
2267            "the imported module is different than the suggested one");
2268 
2269     if (Imported) {
2270       Action = Import;
2271     } else if (Imported.isMissingExpected()) {
2272       markClangModuleAsAffecting(
2273           static_cast<Module *>(Imported)->getTopLevelModule());
2274       // We failed to find a submodule that we assumed would exist (because it
2275       // was in the directory of an umbrella header, for instance), but no
2276       // actual module containing it exists (because the umbrella header is
2277       // incomplete).  Treat this as a textual inclusion.
2278       SuggestedModule = ModuleMap::KnownHeader();
2279       SM = nullptr;
2280     } else if (Imported.isConfigMismatch()) {
2281       // On a configuration mismatch, enter the header textually. We still know
2282       // that it's part of the corresponding module.
2283     } else {
2284       // We hit an error processing the import. Bail out.
2285       if (hadModuleLoaderFatalFailure()) {
2286         // With a fatal failure in the module loader, we abort parsing.
2287         Token &Result = IncludeTok;
2288         assert(CurLexer && "#include but no current lexer set!");
2289         Result.startToken();
2290         CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
2291         CurLexer->cutOffLexing();
2292       }
2293       return {ImportAction::None};
2294     }
2295   }
2296 
2297   // The #included file will be considered to be a system header if either it is
2298   // in a system include directory, or if the #includer is a system include
2299   // header.
2300   SrcMgr::CharacteristicKind FileCharacter =
2301       SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
2302   if (File)
2303     FileCharacter = std::max(HeaderInfo.getFileDirFlavor(&File->getFileEntry()),
2304                              FileCharacter);
2305 
2306   // If this is a '#import' or an import-declaration, don't re-enter the file.
2307   //
2308   // FIXME: If we have a suggested module for a '#include', and we've already
2309   // visited this file, don't bother entering it again. We know it has no
2310   // further effect.
2311   bool EnterOnce =
2312       IsImportDecl ||
2313       IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2314 
2315   bool IsFirstIncludeOfFile = false;
2316 
2317   // Ask HeaderInfo if we should enter this #include file.  If not, #including
2318   // this file will have no effect.
2319   if (Action == Enter && File &&
2320       !HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(),
2321                                          EnterOnce, getLangOpts().Modules, SM,
2322                                          IsFirstIncludeOfFile)) {
2323     // C++ standard modules:
2324     // If we are not in the GMF, then we textually include only
2325     // clang modules:
2326     // Even if we've already preprocessed this header once and know that we
2327     // don't need to see its contents again, we still need to import it if it's
2328     // modular because we might not have imported it from this submodule before.
2329     //
2330     // FIXME: We don't do this when compiling a PCH because the AST
2331     // serialization layer can't cope with it. This means we get local
2332     // submodule visibility semantics wrong in that case.
2333     if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2334       Action = TrackGMFState.inGMF() ? Import : Skip;
2335     else
2336       Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
2337   }
2338 
2339   // Check for circular inclusion of the main file.
2340   // We can't generate a consistent preamble with regard to the conditional
2341   // stack if the main file is included again as due to the preamble bounds
2342   // some directives (e.g. #endif of a header guard) will never be seen.
2343   // Since this will lead to confusing errors, avoid the inclusion.
2344   if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2345       SourceMgr.isMainFile(File->getFileEntry())) {
2346     Diag(FilenameTok.getLocation(),
2347          diag::err_pp_including_mainfile_in_preamble);
2348     return {ImportAction::None};
2349   }
2350 
2351   if (Callbacks && !IsImportDecl) {
2352     // Notify the callback object that we've seen an inclusion directive.
2353     // FIXME: Use a different callback for a pp-import?
2354     Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled,
2355                                   FilenameRange, File, SearchPath, RelativePath,
2356                                   Action == Import ? SuggestedModule.getModule()
2357                                                    : nullptr,
2358                                   FileCharacter);
2359     if (Action == Skip && File)
2360       Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
2361   }
2362 
2363   if (!File)
2364     return {ImportAction::None};
2365 
2366   // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2367   // module corresponding to the named header.
2368   if (IsImportDecl && !SuggestedModule) {
2369     Diag(FilenameTok, diag::err_header_import_not_header_unit)
2370       << OriginalFilename << File->getName();
2371     return {ImportAction::None};
2372   }
2373 
2374   // Issue a diagnostic if the name of the file on disk has a different case
2375   // than the one we're about to open.
2376   const bool CheckIncludePathPortability =
2377       !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2378 
2379   if (CheckIncludePathPortability) {
2380     StringRef Name = LookupFilename;
2381     StringRef NameWithoriginalSlashes = Filename;
2382 #if defined(_WIN32)
2383     // Skip UNC prefix if present. (tryGetRealPathName() always
2384     // returns a path with the prefix skipped.)
2385     bool NameWasUNC = Name.consume_front("\\\\?\\");
2386     NameWithoriginalSlashes.consume_front("\\\\?\\");
2387 #endif
2388     StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2389     SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
2390                                           llvm::sys::path::end(Name));
2391 #if defined(_WIN32)
2392     // -Wnonportable-include-path is designed to diagnose includes using
2393     // case even on systems with a case-insensitive file system.
2394     // On Windows, RealPathName always starts with an upper-case drive
2395     // letter for absolute paths, but Name might start with either
2396     // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2397     // ("foo" will always have on-disk case, no matter which case was
2398     // used in the cd command). To not emit this warning solely for
2399     // the drive letter, whose case is dependent on if `cd` is used
2400     // with upper- or lower-case drive letters, always consider the
2401     // given drive letter case as correct for the purpose of this warning.
2402     SmallString<128> FixedDriveRealPath;
2403     if (llvm::sys::path::is_absolute(Name) &&
2404         llvm::sys::path::is_absolute(RealPathName) &&
2405         toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2406         isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2407       assert(Components.size() >= 3 && "should have drive, backslash, name");
2408       assert(Components[0].size() == 2 && "should start with drive");
2409       assert(Components[0][1] == ':' && "should have colon");
2410       FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2411       RealPathName = FixedDriveRealPath;
2412     }
2413 #endif
2414 
2415     if (trySimplifyPath(Components, RealPathName)) {
2416       SmallString<128> Path;
2417       Path.reserve(Name.size()+2);
2418       Path.push_back(isAngled ? '<' : '"');
2419 
2420       const auto IsSep = [BackslashStyle](char c) {
2421         return llvm::sys::path::is_separator(c, BackslashStyle);
2422       };
2423 
2424       for (auto Component : Components) {
2425         // On POSIX, Components will contain a single '/' as first element
2426         // exactly if Name is an absolute path.
2427         // On Windows, it will contain "C:" followed by '\' for absolute paths.
2428         // The drive letter is optional for absolute paths on Windows, but
2429         // clang currently cannot process absolute paths in #include lines that
2430         // don't have a drive.
2431         // If the first entry in Components is a directory separator,
2432         // then the code at the bottom of this loop that keeps the original
2433         // directory separator style copies it. If the second entry is
2434         // a directory separator (the C:\ case), then that separator already
2435         // got copied when the C: was processed and we want to skip that entry.
2436         if (!(Component.size() == 1 && IsSep(Component[0])))
2437           Path.append(Component);
2438         else if (!Path.empty())
2439           continue;
2440 
2441         // Append the separator(s) the user used, or the close quote
2442         if (Path.size() > NameWithoriginalSlashes.size()) {
2443           Path.push_back(isAngled ? '>' : '"');
2444           continue;
2445         }
2446         assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2447         do
2448           Path.push_back(NameWithoriginalSlashes[Path.size()-1]);
2449         while (Path.size() <= NameWithoriginalSlashes.size() &&
2450                IsSep(NameWithoriginalSlashes[Path.size()-1]));
2451       }
2452 
2453 #if defined(_WIN32)
2454       // Restore UNC prefix if it was there.
2455       if (NameWasUNC)
2456         Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2457 #endif
2458 
2459       // For user files and known standard headers, issue a diagnostic.
2460       // For other system headers, don't. They can be controlled separately.
2461       auto DiagId =
2462           (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
2463               ? diag::pp_nonportable_path
2464               : diag::pp_nonportable_system_path;
2465       Diag(FilenameTok, DiagId) << Path <<
2466         FixItHint::CreateReplacement(FilenameRange, Path);
2467     }
2468   }
2469 
2470   switch (Action) {
2471   case Skip:
2472     // If we don't need to enter the file, stop now.
2473     if (SM)
2474       return {ImportAction::SkippedModuleImport, SM};
2475     return {ImportAction::None};
2476 
2477   case IncludeLimitReached:
2478     // If we reached our include limit and don't want to enter any more files,
2479     // don't go any further.
2480     return {ImportAction::None};
2481 
2482   case Import: {
2483     // If this is a module import, make it visible if needed.
2484     assert(SM && "no module to import");
2485 
2486     makeModuleVisible(SM, EndLoc);
2487 
2488     if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2489         tok::pp___include_macros)
2490       return {ImportAction::None};
2491 
2492     return {ImportAction::ModuleImport, SM};
2493   }
2494 
2495   case Enter:
2496     break;
2497   }
2498 
2499   // Check that we don't have infinite #include recursion.
2500   if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2501     Diag(FilenameTok, diag::err_pp_include_too_deep);
2502     HasReachedMaxIncludeDepth = true;
2503     return {ImportAction::None};
2504   }
2505 
2506   // Look up the file, create a File ID for it.
2507   SourceLocation IncludePos = FilenameTok.getLocation();
2508   // If the filename string was the result of macro expansions, set the include
2509   // position on the file where it will be included and after the expansions.
2510   if (IncludePos.isMacroID())
2511     IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
2512   FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);
2513   if (!FID.isValid()) {
2514     TheModuleLoader.HadFatalFailure = true;
2515     return ImportAction::Failure;
2516   }
2517 
2518   // If all is good, enter the new file!
2519   if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(),
2520                       IsFirstIncludeOfFile))
2521     return {ImportAction::None};
2522 
2523   // Determine if we're switching to building a new submodule, and which one.
2524   // This does not apply for C++20 modules header units.
2525   if (SM && !SM->isHeaderUnit()) {
2526     if (SM->getTopLevelModule()->ShadowingModule) {
2527       // We are building a submodule that belongs to a shadowed module. This
2528       // means we find header files in the shadowed module.
2529       Diag(SM->DefinitionLoc, diag::err_module_build_shadowed_submodule)
2530           << SM->getFullModuleName();
2531       Diag(SM->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2532            diag::note_previous_definition);
2533       return {ImportAction::None};
2534     }
2535     // When building a pch, -fmodule-name tells the compiler to textually
2536     // include headers in the specified module. We are not building the
2537     // specified module.
2538     //
2539     // FIXME: This is the wrong way to handle this. We should produce a PCH
2540     // that behaves the same as the header would behave in a compilation using
2541     // that PCH, which means we should enter the submodule. We need to teach
2542     // the AST serialization layer to deal with the resulting AST.
2543     if (getLangOpts().CompilingPCH && SM->isForBuilding(getLangOpts()))
2544       return {ImportAction::None};
2545 
2546     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2547     CurLexerSubmodule = SM;
2548 
2549     // Let the macro handling code know that any future macros are within
2550     // the new submodule.
2551     EnterSubmodule(SM, EndLoc, /*ForPragma*/ false);
2552 
2553     // Let the parser know that any future declarations are within the new
2554     // submodule.
2555     // FIXME: There's no point doing this if we're handling a #__include_macros
2556     // directive.
2557     return {ImportAction::ModuleBegin, SM};
2558   }
2559 
2560   assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2561   return {ImportAction::None};
2562 }
2563 
2564 /// HandleIncludeNextDirective - Implements \#include_next.
2565 ///
HandleIncludeNextDirective(SourceLocation HashLoc,Token & IncludeNextTok)2566 void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2567                                               Token &IncludeNextTok) {
2568   Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2569 
2570   ConstSearchDirIterator Lookup = nullptr;
2571   const FileEntry *LookupFromFile;
2572   std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2573 
2574   return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,
2575                                 LookupFromFile);
2576 }
2577 
2578 /// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
HandleMicrosoftImportDirective(Token & Tok)2579 void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2580   // The Microsoft #import directive takes a type library and generates header
2581   // files from it, and includes those.  This is beyond the scope of what clang
2582   // does, so we ignore it and error out.  However, #import can optionally have
2583   // trailing attributes that span multiple lines.  We're going to eat those
2584   // so we can continue processing from there.
2585   Diag(Tok, diag::err_pp_import_directive_ms );
2586 
2587   // Read tokens until we get to the end of the directive.  Note that the
2588   // directive can be split over multiple lines using the backslash character.
2589   DiscardUntilEndOfDirective();
2590 }
2591 
2592 /// HandleImportDirective - Implements \#import.
2593 ///
HandleImportDirective(SourceLocation HashLoc,Token & ImportTok)2594 void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2595                                          Token &ImportTok) {
2596   if (!LangOpts.ObjC) {  // #import is standard for ObjC.
2597     if (LangOpts.MSVCCompat)
2598       return HandleMicrosoftImportDirective(ImportTok);
2599     Diag(ImportTok, diag::ext_pp_import_directive);
2600   }
2601   return HandleIncludeDirective(HashLoc, ImportTok);
2602 }
2603 
2604 /// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2605 /// pseudo directive in the predefines buffer.  This handles it by sucking all
2606 /// tokens through the preprocessor and discarding them (only keeping the side
2607 /// effects on the preprocessor).
HandleIncludeMacrosDirective(SourceLocation HashLoc,Token & IncludeMacrosTok)2608 void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2609                                                 Token &IncludeMacrosTok) {
2610   // This directive should only occur in the predefines buffer.  If not, emit an
2611   // error and reject it.
2612   SourceLocation Loc = IncludeMacrosTok.getLocation();
2613   if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2614     Diag(IncludeMacrosTok.getLocation(),
2615          diag::pp_include_macros_out_of_predefines);
2616     DiscardUntilEndOfDirective();
2617     return;
2618   }
2619 
2620   // Treat this as a normal #include for checking purposes.  If this is
2621   // successful, it will push a new lexer onto the include stack.
2622   HandleIncludeDirective(HashLoc, IncludeMacrosTok);
2623 
2624   Token TmpTok;
2625   do {
2626     Lex(TmpTok);
2627     assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2628   } while (TmpTok.isNot(tok::hashhash));
2629 }
2630 
2631 //===----------------------------------------------------------------------===//
2632 // Preprocessor Macro Directive Handling.
2633 //===----------------------------------------------------------------------===//
2634 
2635 /// ReadMacroParameterList - The ( starting a parameter list of a macro
2636 /// definition has just been read.  Lex the rest of the parameters and the
2637 /// closing ), updating MI with what we learn.  Return true if an error occurs
2638 /// parsing the param list.
ReadMacroParameterList(MacroInfo * MI,Token & Tok)2639 bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2640   SmallVector<IdentifierInfo*, 32> Parameters;
2641 
2642   while (true) {
2643     LexUnexpandedToken(Tok);
2644     switch (Tok.getKind()) {
2645     case tok::r_paren:
2646       // Found the end of the parameter list.
2647       if (Parameters.empty())  // #define FOO()
2648         return false;
2649       // Otherwise we have #define FOO(A,)
2650       Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2651       return true;
2652     case tok::ellipsis:  // #define X(... -> C99 varargs
2653       if (!LangOpts.C99)
2654         Diag(Tok, LangOpts.CPlusPlus11 ?
2655              diag::warn_cxx98_compat_variadic_macro :
2656              diag::ext_variadic_macro);
2657 
2658       // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2659       if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2660         Diag(Tok, diag::ext_pp_opencl_variadic_macros);
2661       }
2662 
2663       // Lex the token after the identifier.
2664       LexUnexpandedToken(Tok);
2665       if (Tok.isNot(tok::r_paren)) {
2666         Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2667         return true;
2668       }
2669       // Add the __VA_ARGS__ identifier as a parameter.
2670       Parameters.push_back(Ident__VA_ARGS__);
2671       MI->setIsC99Varargs();
2672       MI->setParameterList(Parameters, BP);
2673       return false;
2674     case tok::eod:  // #define X(
2675       Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2676       return true;
2677     default:
2678       // Handle keywords and identifiers here to accept things like
2679       // #define Foo(for) for.
2680       IdentifierInfo *II = Tok.getIdentifierInfo();
2681       if (!II) {
2682         // #define X(1
2683         Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2684         return true;
2685       }
2686 
2687       // If this is already used as a parameter, it is used multiple times (e.g.
2688       // #define X(A,A.
2689       if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6
2690         Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
2691         return true;
2692       }
2693 
2694       // Add the parameter to the macro info.
2695       Parameters.push_back(II);
2696 
2697       // Lex the token after the identifier.
2698       LexUnexpandedToken(Tok);
2699 
2700       switch (Tok.getKind()) {
2701       default:          // #define X(A B
2702         Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2703         return true;
2704       case tok::r_paren: // #define X(A)
2705         MI->setParameterList(Parameters, BP);
2706         return false;
2707       case tok::comma:  // #define X(A,
2708         break;
2709       case tok::ellipsis:  // #define X(A... -> GCC extension
2710         // Diagnose extension.
2711         Diag(Tok, diag::ext_named_variadic_macro);
2712 
2713         // Lex the token after the identifier.
2714         LexUnexpandedToken(Tok);
2715         if (Tok.isNot(tok::r_paren)) {
2716           Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2717           return true;
2718         }
2719 
2720         MI->setIsGNUVarargs();
2721         MI->setParameterList(Parameters, BP);
2722         return false;
2723       }
2724     }
2725   }
2726 }
2727 
isConfigurationPattern(Token & MacroName,MacroInfo * MI,const LangOptions & LOptions)2728 static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2729                                    const LangOptions &LOptions) {
2730   if (MI->getNumTokens() == 1) {
2731     const Token &Value = MI->getReplacementToken(0);
2732 
2733     // Macro that is identity, like '#define inline inline' is a valid pattern.
2734     if (MacroName.getKind() == Value.getKind())
2735       return true;
2736 
2737     // Macro that maps a keyword to the same keyword decorated with leading/
2738     // trailing underscores is a valid pattern:
2739     //    #define inline __inline
2740     //    #define inline __inline__
2741     //    #define inline _inline (in MS compatibility mode)
2742     StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2743     if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2744       if (!II->isKeyword(LOptions))
2745         return false;
2746       StringRef ValueText = II->getName();
2747       StringRef TrimmedValue = ValueText;
2748       if (!ValueText.startswith("__")) {
2749         if (ValueText.startswith("_"))
2750           TrimmedValue = TrimmedValue.drop_front(1);
2751         else
2752           return false;
2753       } else {
2754         TrimmedValue = TrimmedValue.drop_front(2);
2755         if (TrimmedValue.endswith("__"))
2756           TrimmedValue = TrimmedValue.drop_back(2);
2757       }
2758       return TrimmedValue.equals(MacroText);
2759     } else {
2760       return false;
2761     }
2762   }
2763 
2764   // #define inline
2765   return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
2766                            tok::kw_const) &&
2767          MI->getNumTokens() == 0;
2768 }
2769 
2770 // ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2771 // entire line) of the macro's tokens and adds them to MacroInfo, and while
2772 // doing so performs certain validity checks including (but not limited to):
2773 //   - # (stringization) is followed by a macro parameter
2774 //
2775 //  Returns a nullptr if an invalid sequence of tokens is encountered or returns
2776 //  a pointer to a MacroInfo object.
2777 
ReadOptionalMacroParameterListAndBody(const Token & MacroNameTok,const bool ImmediatelyAfterHeaderGuard)2778 MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2779     const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2780 
2781   Token LastTok = MacroNameTok;
2782   // Create the new macro.
2783   MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
2784 
2785   Token Tok;
2786   LexUnexpandedToken(Tok);
2787 
2788   // Ensure we consume the rest of the macro body if errors occur.
2789   auto _ = llvm::make_scope_exit([&]() {
2790     // The flag indicates if we are still waiting for 'eod'.
2791     if (CurLexer->ParsingPreprocessorDirective)
2792       DiscardUntilEndOfDirective();
2793   });
2794 
2795   // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2796   // within their appropriate context.
2797   VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2798 
2799   // If this is a function-like macro definition, parse the argument list,
2800   // marking each of the identifiers as being used as macro arguments.  Also,
2801   // check other constraints on the first token of the macro body.
2802   if (Tok.is(tok::eod)) {
2803     if (ImmediatelyAfterHeaderGuard) {
2804       // Save this macro information since it may part of a header guard.
2805       CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),
2806                                         MacroNameTok.getLocation());
2807     }
2808     // If there is no body to this macro, we have no special handling here.
2809   } else if (Tok.hasLeadingSpace()) {
2810     // This is a normal token with leading space.  Clear the leading space
2811     // marker on the first token to get proper expansion.
2812     Tok.clearFlag(Token::LeadingSpace);
2813   } else if (Tok.is(tok::l_paren)) {
2814     // This is a function-like macro definition.  Read the argument list.
2815     MI->setIsFunctionLike();
2816     if (ReadMacroParameterList(MI, LastTok))
2817       return nullptr;
2818 
2819     // If this is a definition of an ISO C/C++ variadic function-like macro (not
2820     // using the GNU named varargs extension) inform our variadic scope guard
2821     // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2822     // allowed only within the definition of a variadic macro.
2823 
2824     if (MI->isC99Varargs()) {
2825       VariadicMacroScopeGuard.enterScope();
2826     }
2827 
2828     // Read the first token after the arg list for down below.
2829     LexUnexpandedToken(Tok);
2830   } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2831     // C99 requires whitespace between the macro definition and the body.  Emit
2832     // a diagnostic for something like "#define X+".
2833     Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2834   } else {
2835     // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2836     // first character of a replacement list is not a character required by
2837     // subclause 5.2.1, then there shall be white-space separation between the
2838     // identifier and the replacement list.".  5.2.1 lists this set:
2839     //   "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
2840     // is irrelevant here.
2841     bool isInvalid = false;
2842     if (Tok.is(tok::at)) // @ is not in the list above.
2843       isInvalid = true;
2844     else if (Tok.is(tok::unknown)) {
2845       // If we have an unknown token, it is something strange like "`".  Since
2846       // all of valid characters would have lexed into a single character
2847       // token of some sort, we know this is not a valid case.
2848       isInvalid = true;
2849     }
2850     if (isInvalid)
2851       Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
2852     else
2853       Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
2854   }
2855 
2856   if (!Tok.is(tok::eod))
2857     LastTok = Tok;
2858 
2859   SmallVector<Token, 16> Tokens;
2860 
2861   // Read the rest of the macro body.
2862   if (MI->isObjectLike()) {
2863     // Object-like macros are very simple, just read their body.
2864     while (Tok.isNot(tok::eod)) {
2865       LastTok = Tok;
2866       Tokens.push_back(Tok);
2867       // Get the next token of the macro.
2868       LexUnexpandedToken(Tok);
2869     }
2870   } else {
2871     // Otherwise, read the body of a function-like macro.  While we are at it,
2872     // check C99 6.10.3.2p1: ensure that # operators are followed by macro
2873     // parameters in function-like macro expansions.
2874 
2875     VAOptDefinitionContext VAOCtx(*this);
2876 
2877     while (Tok.isNot(tok::eod)) {
2878       LastTok = Tok;
2879 
2880       if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
2881         Tokens.push_back(Tok);
2882 
2883         if (VAOCtx.isVAOptToken(Tok)) {
2884           // If we're already within a VAOPT, emit an error.
2885           if (VAOCtx.isInVAOpt()) {
2886             Diag(Tok, diag::err_pp_vaopt_nested_use);
2887             return nullptr;
2888           }
2889           // Ensure VAOPT is followed by a '(' .
2890           LexUnexpandedToken(Tok);
2891           if (Tok.isNot(tok::l_paren)) {
2892             Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
2893             return nullptr;
2894           }
2895           Tokens.push_back(Tok);
2896           VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
2897           LexUnexpandedToken(Tok);
2898           if (Tok.is(tok::hashhash)) {
2899             Diag(Tok, diag::err_vaopt_paste_at_start);
2900             return nullptr;
2901           }
2902           continue;
2903         } else if (VAOCtx.isInVAOpt()) {
2904           if (Tok.is(tok::r_paren)) {
2905             if (VAOCtx.sawClosingParen()) {
2906               assert(Tokens.size() >= 3 &&
2907                      "Must have seen at least __VA_OPT__( "
2908                      "and a subsequent tok::r_paren");
2909               if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {
2910                 Diag(Tok, diag::err_vaopt_paste_at_end);
2911                 return nullptr;
2912               }
2913             }
2914           } else if (Tok.is(tok::l_paren)) {
2915             VAOCtx.sawOpeningParen(Tok.getLocation());
2916           }
2917         }
2918         // Get the next token of the macro.
2919         LexUnexpandedToken(Tok);
2920         continue;
2921       }
2922 
2923       // If we're in -traditional mode, then we should ignore stringification
2924       // and token pasting. Mark the tokens as unknown so as not to confuse
2925       // things.
2926       if (getLangOpts().TraditionalCPP) {
2927         Tok.setKind(tok::unknown);
2928         Tokens.push_back(Tok);
2929 
2930         // Get the next token of the macro.
2931         LexUnexpandedToken(Tok);
2932         continue;
2933       }
2934 
2935       if (Tok.is(tok::hashhash)) {
2936         // If we see token pasting, check if it looks like the gcc comma
2937         // pasting extension.  We'll use this information to suppress
2938         // diagnostics later on.
2939 
2940         // Get the next token of the macro.
2941         LexUnexpandedToken(Tok);
2942 
2943         if (Tok.is(tok::eod)) {
2944           Tokens.push_back(LastTok);
2945           break;
2946         }
2947 
2948         if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
2949             Tokens[Tokens.size() - 1].is(tok::comma))
2950           MI->setHasCommaPasting();
2951 
2952         // Things look ok, add the '##' token to the macro.
2953         Tokens.push_back(LastTok);
2954         continue;
2955       }
2956 
2957       // Our Token is a stringization operator.
2958       // Get the next token of the macro.
2959       LexUnexpandedToken(Tok);
2960 
2961       // Check for a valid macro arg identifier or __VA_OPT__.
2962       if (!VAOCtx.isVAOptToken(Tok) &&
2963           (Tok.getIdentifierInfo() == nullptr ||
2964            MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {
2965 
2966         // If this is assembler-with-cpp mode, we accept random gibberish after
2967         // the '#' because '#' is often a comment character.  However, change
2968         // the kind of the token to tok::unknown so that the preprocessor isn't
2969         // confused.
2970         if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
2971           LastTok.setKind(tok::unknown);
2972           Tokens.push_back(LastTok);
2973           continue;
2974         } else {
2975           Diag(Tok, diag::err_pp_stringize_not_parameter)
2976             << LastTok.is(tok::hashat);
2977           return nullptr;
2978         }
2979       }
2980 
2981       // Things look ok, add the '#' and param name tokens to the macro.
2982       Tokens.push_back(LastTok);
2983 
2984       // If the token following '#' is VAOPT, let the next iteration handle it
2985       // and check it for correctness, otherwise add the token and prime the
2986       // loop with the next one.
2987       if (!VAOCtx.isVAOptToken(Tok)) {
2988         Tokens.push_back(Tok);
2989         LastTok = Tok;
2990 
2991         // Get the next token of the macro.
2992         LexUnexpandedToken(Tok);
2993       }
2994     }
2995     if (VAOCtx.isInVAOpt()) {
2996       assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
2997       Diag(Tok, diag::err_pp_expected_after)
2998         << LastTok.getKind() << tok::r_paren;
2999       Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
3000       return nullptr;
3001     }
3002   }
3003   MI->setDefinitionEndLoc(LastTok.getLocation());
3004 
3005   MI->setTokens(Tokens, BP);
3006   return MI;
3007 }
3008 /// HandleDefineDirective - Implements \#define.  This consumes the entire macro
3009 /// line then lets the caller lex the next real token.
HandleDefineDirective(Token & DefineTok,const bool ImmediatelyAfterHeaderGuard)3010 void Preprocessor::HandleDefineDirective(
3011     Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3012   ++NumDefined;
3013 
3014   Token MacroNameTok;
3015   bool MacroShadowsKeyword;
3016   ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
3017 
3018   // Error reading macro name?  If so, diagnostic already issued.
3019   if (MacroNameTok.is(tok::eod))
3020     return;
3021 
3022   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3023   // Issue a final pragma warning if we're defining a macro that was has been
3024   // undefined and is being redefined.
3025   if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3026     emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3027 
3028   // If we are supposed to keep comments in #defines, reenable comment saving
3029   // mode.
3030   if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3031 
3032   MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3033       MacroNameTok, ImmediatelyAfterHeaderGuard);
3034 
3035   if (!MI) return;
3036 
3037   if (MacroShadowsKeyword &&
3038       !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
3039     Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
3040   }
3041   // Check that there is no paste (##) operator at the beginning or end of the
3042   // replacement list.
3043   unsigned NumTokens = MI->getNumTokens();
3044   if (NumTokens != 0) {
3045     if (MI->getReplacementToken(0).is(tok::hashhash)) {
3046       Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
3047       return;
3048     }
3049     if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
3050       Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
3051       return;
3052     }
3053   }
3054 
3055   // When skipping just warn about macros that do not match.
3056   if (SkippingUntilPCHThroughHeader) {
3057     const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());
3058     if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
3059                              /*Syntactic=*/LangOpts.MicrosoftExt))
3060       Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
3061           << MacroNameTok.getIdentifierInfo();
3062     // Issue the diagnostic but allow the change if msvc extensions are enabled
3063     if (!LangOpts.MicrosoftExt)
3064       return;
3065   }
3066 
3067   // Finally, if this identifier already had a macro defined for it, verify that
3068   // the macro bodies are identical, and issue diagnostics if they are not.
3069   if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
3070     // Final macros are hard-mode: they always warn. Even if the bodies are
3071     // identical. Even if they are in system headers. Even if they are things we
3072     // would silently allow in the past.
3073     if (MacroNameTok.getIdentifierInfo()->isFinal())
3074       emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3075 
3076     // In Objective-C, ignore attempts to directly redefine the builtin
3077     // definitions of the ownership qualifiers.  It's still possible to
3078     // #undef them.
3079     auto isObjCProtectedMacro = [](const IdentifierInfo *II) -> bool {
3080       return II->isStr("__strong") ||
3081              II->isStr("__weak") ||
3082              II->isStr("__unsafe_unretained") ||
3083              II->isStr("__autoreleasing");
3084     };
3085    if (getLangOpts().ObjC &&
3086         SourceMgr.getFileID(OtherMI->getDefinitionLoc())
3087           == getPredefinesFileID() &&
3088         isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
3089       // Warn if it changes the tokens.
3090       if ((!getDiagnostics().getSuppressSystemWarnings() ||
3091            !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
3092           !MI->isIdenticalTo(*OtherMI, *this,
3093                              /*Syntactic=*/LangOpts.MicrosoftExt)) {
3094         Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
3095       }
3096       assert(!OtherMI->isWarnIfUnused());
3097       return;
3098     }
3099 
3100     // It is very common for system headers to have tons of macro redefinitions
3101     // and for warnings to be disabled in system headers.  If this is the case,
3102     // then don't bother calling MacroInfo::isIdenticalTo.
3103     if (!getDiagnostics().getSuppressSystemWarnings() ||
3104         !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
3105 
3106       if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3107         Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
3108 
3109       // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3110       // C++ [cpp.predefined]p4, but allow it as an extension.
3111       if (OtherMI->isBuiltinMacro())
3112         Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
3113       // Macros must be identical.  This means all tokens and whitespace
3114       // separation must be the same.  C99 6.10.3p2.
3115       else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3116                !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
3117         Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
3118           << MacroNameTok.getIdentifierInfo();
3119         Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
3120       }
3121     }
3122     if (OtherMI->isWarnIfUnused())
3123       WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
3124   }
3125 
3126   DefMacroDirective *MD =
3127       appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
3128 
3129   assert(!MI->isUsed());
3130   // If we need warning for not using the macro, add its location in the
3131   // warn-because-unused-macro set. If it gets used it will be removed from set.
3132   if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
3133       !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
3134       !MacroExpansionInDirectivesOverride &&
3135       getSourceManager().getFileID(MI->getDefinitionLoc()) !=
3136           getPredefinesFileID()) {
3137     MI->setIsWarnIfUnused(true);
3138     WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
3139   }
3140 
3141   // If the callbacks want to know, tell them about the macro definition.
3142   if (Callbacks)
3143     Callbacks->MacroDefined(MacroNameTok, MD);
3144 
3145   // If we're in MS compatibility mode and the macro being defined is the
3146   // assert macro, implicitly add a macro definition for static_assert to work
3147   // around their broken assert.h header file in C. Only do so if there isn't
3148   // already a static_assert macro defined.
3149   if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat &&
3150       MacroNameTok.getIdentifierInfo()->isStr("assert") &&
3151       !isMacroDefined("static_assert")) {
3152     MacroInfo *MI = AllocateMacroInfo(SourceLocation());
3153 
3154     Token Tok;
3155     Tok.startToken();
3156     Tok.setKind(tok::kw__Static_assert);
3157     Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert"));
3158     MI->setTokens({Tok}, BP);
3159     (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI);
3160   }
3161 }
3162 
3163 /// HandleUndefDirective - Implements \#undef.
3164 ///
HandleUndefDirective()3165 void Preprocessor::HandleUndefDirective() {
3166   ++NumUndefined;
3167 
3168   Token MacroNameTok;
3169   ReadMacroName(MacroNameTok, MU_Undef);
3170 
3171   // Error reading macro name?  If so, diagnostic already issued.
3172   if (MacroNameTok.is(tok::eod))
3173     return;
3174 
3175   // Check to see if this is the last token on the #undef line.
3176   CheckEndOfDirective("undef");
3177 
3178   // Okay, we have a valid identifier to undef.
3179   auto *II = MacroNameTok.getIdentifierInfo();
3180   auto MD = getMacroDefinition(II);
3181   UndefMacroDirective *Undef = nullptr;
3182 
3183   if (II->isFinal())
3184     emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);
3185 
3186   // If the macro is not defined, this is a noop undef.
3187   if (const MacroInfo *MI = MD.getMacroInfo()) {
3188     if (!MI->isUsed() && MI->isWarnIfUnused())
3189       Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
3190 
3191     if (MI->isWarnIfUnused())
3192       WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
3193 
3194     Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());
3195   }
3196 
3197   // If the callbacks want to know, tell them about the macro #undef.
3198   // Note: no matter if the macro was defined or not.
3199   if (Callbacks)
3200     Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3201 
3202   if (Undef)
3203     appendMacroDirective(II, Undef);
3204 }
3205 
3206 //===----------------------------------------------------------------------===//
3207 // Preprocessor Conditional Directive Handling.
3208 //===----------------------------------------------------------------------===//
3209 
3210 /// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive.  isIfndef
3211 /// is true when this is a \#ifndef directive.  ReadAnyTokensBeforeDirective is
3212 /// true if any tokens have been returned or pp-directives activated before this
3213 /// \#ifndef has been lexed.
3214 ///
HandleIfdefDirective(Token & Result,const Token & HashToken,bool isIfndef,bool ReadAnyTokensBeforeDirective)3215 void Preprocessor::HandleIfdefDirective(Token &Result,
3216                                         const Token &HashToken,
3217                                         bool isIfndef,
3218                                         bool ReadAnyTokensBeforeDirective) {
3219   ++NumIf;
3220   Token DirectiveTok = Result;
3221 
3222   Token MacroNameTok;
3223   ReadMacroName(MacroNameTok);
3224 
3225   // Error reading macro name?  If so, diagnostic already issued.
3226   if (MacroNameTok.is(tok::eod)) {
3227     // Skip code until we get to #endif.  This helps with recovery by not
3228     // emitting an error when the #endif is reached.
3229     SkipExcludedConditionalBlock(HashToken.getLocation(),
3230                                  DirectiveTok.getLocation(),
3231                                  /*Foundnonskip*/ false, /*FoundElse*/ false);
3232     return;
3233   }
3234 
3235   emitMacroExpansionWarnings(MacroNameTok);
3236 
3237   // Check to see if this is the last token on the #if[n]def line.
3238   CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
3239 
3240   IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3241   auto MD = getMacroDefinition(MII);
3242   MacroInfo *MI = MD.getMacroInfo();
3243 
3244   if (CurPPLexer->getConditionalStackDepth() == 0) {
3245     // If the start of a top-level #ifdef and if the macro is not defined,
3246     // inform MIOpt that this might be the start of a proper include guard.
3247     // Otherwise it is some other form of unknown conditional which we can't
3248     // handle.
3249     if (!ReadAnyTokensBeforeDirective && !MI) {
3250       assert(isIfndef && "#ifdef shouldn't reach here");
3251       CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());
3252     } else
3253       CurPPLexer->MIOpt.EnterTopLevelConditional();
3254   }
3255 
3256   // If there is a macro, process it.
3257   if (MI)  // Mark it used.
3258     markMacroAsUsed(MI);
3259 
3260   if (Callbacks) {
3261     if (isIfndef)
3262       Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
3263     else
3264       Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
3265   }
3266 
3267   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3268     getSourceManager().isInMainFile(DirectiveTok.getLocation());
3269 
3270   // Should we include the stuff contained by this directive?
3271   if (PPOpts->SingleFileParseMode && !MI) {
3272     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3273     // the directive blocks.
3274     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3275                                      /*wasskip*/false, /*foundnonskip*/false,
3276                                      /*foundelse*/false);
3277   } else if (!MI == isIfndef || RetainExcludedCB) {
3278     // Yes, remember that we are inside a conditional, then lex the next token.
3279     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3280                                      /*wasskip*/false, /*foundnonskip*/true,
3281                                      /*foundelse*/false);
3282   } else {
3283     // No, skip the contents of this block.
3284     SkipExcludedConditionalBlock(HashToken.getLocation(),
3285                                  DirectiveTok.getLocation(),
3286                                  /*Foundnonskip*/ false,
3287                                  /*FoundElse*/ false);
3288   }
3289 }
3290 
3291 /// HandleIfDirective - Implements the \#if directive.
3292 ///
HandleIfDirective(Token & IfToken,const Token & HashToken,bool ReadAnyTokensBeforeDirective)3293 void Preprocessor::HandleIfDirective(Token &IfToken,
3294                                      const Token &HashToken,
3295                                      bool ReadAnyTokensBeforeDirective) {
3296   ++NumIf;
3297 
3298   // Parse and evaluate the conditional expression.
3299   IdentifierInfo *IfNDefMacro = nullptr;
3300   const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3301   const bool ConditionalTrue = DER.Conditional;
3302   // Lexer might become invalid if we hit code completion point while evaluating
3303   // expression.
3304   if (!CurPPLexer)
3305     return;
3306 
3307   // If this condition is equivalent to #ifndef X, and if this is the first
3308   // directive seen, handle it for the multiple-include optimization.
3309   if (CurPPLexer->getConditionalStackDepth() == 0) {
3310     if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3311       // FIXME: Pass in the location of the macro name, not the 'if' token.
3312       CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());
3313     else
3314       CurPPLexer->MIOpt.EnterTopLevelConditional();
3315   }
3316 
3317   if (Callbacks)
3318     Callbacks->If(
3319         IfToken.getLocation(), DER.ExprRange,
3320         (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3321 
3322   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3323     getSourceManager().isInMainFile(IfToken.getLocation());
3324 
3325   // Should we include the stuff contained by this directive?
3326   if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
3327     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3328     // the directive blocks.
3329     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3330                                      /*foundnonskip*/false, /*foundelse*/false);
3331   } else if (ConditionalTrue || RetainExcludedCB) {
3332     // Yes, remember that we are inside a conditional, then lex the next token.
3333     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3334                                    /*foundnonskip*/true, /*foundelse*/false);
3335   } else {
3336     // No, skip the contents of this block.
3337     SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
3338                                  /*Foundnonskip*/ false,
3339                                  /*FoundElse*/ false);
3340   }
3341 }
3342 
3343 /// HandleEndifDirective - Implements the \#endif directive.
3344 ///
HandleEndifDirective(Token & EndifToken)3345 void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3346   ++NumEndif;
3347 
3348   // Check that this is the whole directive.
3349   CheckEndOfDirective("endif");
3350 
3351   PPConditionalInfo CondInfo;
3352   if (CurPPLexer->popConditionalLevel(CondInfo)) {
3353     // No conditionals on the stack: this is an #endif without an #if.
3354     Diag(EndifToken, diag::err_pp_endif_without_if);
3355     return;
3356   }
3357 
3358   // If this the end of a top-level #endif, inform MIOpt.
3359   if (CurPPLexer->getConditionalStackDepth() == 0)
3360     CurPPLexer->MIOpt.ExitTopLevelConditional();
3361 
3362   assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3363          "This code should only be reachable in the non-skipping case!");
3364 
3365   if (Callbacks)
3366     Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);
3367 }
3368 
3369 /// HandleElseDirective - Implements the \#else directive.
3370 ///
HandleElseDirective(Token & Result,const Token & HashToken)3371 void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3372   ++NumElse;
3373 
3374   // #else directive in a non-skipping conditional... start skipping.
3375   CheckEndOfDirective("else");
3376 
3377   PPConditionalInfo CI;
3378   if (CurPPLexer->popConditionalLevel(CI)) {
3379     Diag(Result, diag::pp_err_else_without_if);
3380     return;
3381   }
3382 
3383   // If this is a top-level #else, inform the MIOpt.
3384   if (CurPPLexer->getConditionalStackDepth() == 0)
3385     CurPPLexer->MIOpt.EnterTopLevelConditional();
3386 
3387   // If this is a #else with a #else before it, report the error.
3388   if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
3389 
3390   if (Callbacks)
3391     Callbacks->Else(Result.getLocation(), CI.IfLoc);
3392 
3393   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3394     getSourceManager().isInMainFile(Result.getLocation());
3395 
3396   if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3397     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3398     // the directive blocks.
3399     CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
3400                                      /*foundnonskip*/false, /*foundelse*/true);
3401     return;
3402   }
3403 
3404   // Finally, skip the rest of the contents of this block.
3405   SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
3406                                /*Foundnonskip*/ true,
3407                                /*FoundElse*/ true, Result.getLocation());
3408 }
3409 
3410 /// Implements the \#elif, \#elifdef, and \#elifndef directives.
HandleElifFamilyDirective(Token & ElifToken,const Token & HashToken,tok::PPKeywordKind Kind)3411 void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3412                                              const Token &HashToken,
3413                                              tok::PPKeywordKind Kind) {
3414   PPElifDiag DirKind = Kind == tok::pp_elif      ? PED_Elif
3415                        : Kind == tok::pp_elifdef ? PED_Elifdef
3416                                                  : PED_Elifndef;
3417   ++NumElse;
3418 
3419   // Warn if using `#elifdef` & `#elifndef` in not C2x & C++2b mode.
3420   switch (DirKind) {
3421   case PED_Elifdef:
3422   case PED_Elifndef:
3423     unsigned DiagID;
3424     if (LangOpts.CPlusPlus)
3425       DiagID = LangOpts.CPlusPlus2b ? diag::warn_cxx2b_compat_pp_directive
3426                                     : diag::ext_cxx2b_pp_directive;
3427     else
3428       DiagID = LangOpts.C2x ? diag::warn_c2x_compat_pp_directive
3429                             : diag::ext_c2x_pp_directive;
3430     Diag(ElifToken, DiagID) << DirKind;
3431     break;
3432   default:
3433     break;
3434   }
3435 
3436   // #elif directive in a non-skipping conditional... start skipping.
3437   // We don't care what the condition is, because we will always skip it (since
3438   // the block immediately before it was included).
3439   SourceRange ConditionRange = DiscardUntilEndOfDirective();
3440 
3441   PPConditionalInfo CI;
3442   if (CurPPLexer->popConditionalLevel(CI)) {
3443     Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind;
3444     return;
3445   }
3446 
3447   // If this is a top-level #elif, inform the MIOpt.
3448   if (CurPPLexer->getConditionalStackDepth() == 0)
3449     CurPPLexer->MIOpt.EnterTopLevelConditional();
3450 
3451   // If this is a #elif with a #else before it, report the error.
3452   if (CI.FoundElse)
3453     Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind;
3454 
3455   if (Callbacks) {
3456     switch (Kind) {
3457     case tok::pp_elif:
3458       Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
3459                       PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
3460       break;
3461     case tok::pp_elifdef:
3462       Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3463       break;
3464     case tok::pp_elifndef:
3465       Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3466       break;
3467     default:
3468       assert(false && "unexpected directive kind");
3469       break;
3470     }
3471   }
3472 
3473   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3474     getSourceManager().isInMainFile(ElifToken.getLocation());
3475 
3476   if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3477     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3478     // the directive blocks.
3479     CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
3480                                      /*foundnonskip*/false, /*foundelse*/false);
3481     return;
3482   }
3483 
3484   // Finally, skip the rest of the contents of this block.
3485   SkipExcludedConditionalBlock(
3486       HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
3487       /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
3488 }
3489