1 //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Implements # directive processing for the Preprocessor.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/DirectoryEntry.h"
16 #include "clang/Basic/FileManager.h"
17 #include "clang/Basic/IdentifierTable.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/Module.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Lex/CodeCompletionHandler.h"
24 #include "clang/Lex/HeaderSearch.h"
25 #include "clang/Lex/HeaderSearchOptions.h"
26 #include "clang/Lex/LexDiagnostic.h"
27 #include "clang/Lex/LiteralSupport.h"
28 #include "clang/Lex/MacroInfo.h"
29 #include "clang/Lex/ModuleLoader.h"
30 #include "clang/Lex/ModuleMap.h"
31 #include "clang/Lex/PPCallbacks.h"
32 #include "clang/Lex/Pragma.h"
33 #include "clang/Lex/Preprocessor.h"
34 #include "clang/Lex/PreprocessorOptions.h"
35 #include "clang/Lex/Token.h"
36 #include "clang/Lex/VariadicMacroSupport.h"
37 #include "llvm/ADT/ArrayRef.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/ScopeExit.h"
40 #include "llvm/ADT/SmallString.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/ADT/StringSwitch.h"
44 #include "llvm/Support/AlignOf.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include "llvm/Support/Path.h"
47 #include "llvm/Support/SaveAndRestore.h"
48 #include <algorithm>
49 #include <cassert>
50 #include <cstring>
51 #include <new>
52 #include <optional>
53 #include <string>
54 #include <utility>
55 
56 using namespace clang;
57 
58 //===----------------------------------------------------------------------===//
59 // Utility Methods for Preprocessor Directive Handling.
60 //===----------------------------------------------------------------------===//
61 
AllocateMacroInfo(SourceLocation L)62 MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
63   static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
64   return new (BP) MacroInfo(L);
65 }
66 
AllocateDefMacroDirective(MacroInfo * MI,SourceLocation Loc)67 DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
68                                                            SourceLocation Loc) {
69   return new (BP) DefMacroDirective(MI, Loc);
70 }
71 
72 UndefMacroDirective *
AllocateUndefMacroDirective(SourceLocation UndefLoc)73 Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
74   return new (BP) UndefMacroDirective(UndefLoc);
75 }
76 
77 VisibilityMacroDirective *
AllocateVisibilityMacroDirective(SourceLocation Loc,bool isPublic)78 Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
79                                                bool isPublic) {
80   return new (BP) VisibilityMacroDirective(Loc, isPublic);
81 }
82 
83 /// Read and discard all tokens remaining on the current line until
84 /// the tok::eod token is found.
DiscardUntilEndOfDirective()85 SourceRange Preprocessor::DiscardUntilEndOfDirective() {
86   Token Tmp;
87   SourceRange Res;
88 
89   LexUnexpandedToken(Tmp);
90   Res.setBegin(Tmp.getLocation());
91   while (Tmp.isNot(tok::eod)) {
92     assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
93     LexUnexpandedToken(Tmp);
94   }
95   Res.setEnd(Tmp.getLocation());
96   return Res;
97 }
98 
99 /// Enumerates possible cases of #define/#undef a reserved identifier.
100 enum MacroDiag {
101   MD_NoWarn,        //> Not a reserved identifier
102   MD_KeywordDef,    //> Macro hides keyword, enabled by default
103   MD_ReservedMacro  //> #define of #undef reserved id, disabled by default
104 };
105 
106 /// Enumerates possible %select values for the pp_err_elif_after_else and
107 /// pp_err_elif_without_if diagnostics.
108 enum PPElifDiag {
109   PED_Elif,
110   PED_Elifdef,
111   PED_Elifndef
112 };
113 
isFeatureTestMacro(StringRef MacroName)114 static bool isFeatureTestMacro(StringRef MacroName) {
115   // list from:
116   // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
117   // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
118   // * man 7 feature_test_macros
119   // The list must be sorted for correct binary search.
120   static constexpr StringRef ReservedMacro[] = {
121       "_ATFILE_SOURCE",
122       "_BSD_SOURCE",
123       "_CRT_NONSTDC_NO_WARNINGS",
124       "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
125       "_CRT_SECURE_NO_WARNINGS",
126       "_FILE_OFFSET_BITS",
127       "_FORTIFY_SOURCE",
128       "_GLIBCXX_ASSERTIONS",
129       "_GLIBCXX_CONCEPT_CHECKS",
130       "_GLIBCXX_DEBUG",
131       "_GLIBCXX_DEBUG_PEDANTIC",
132       "_GLIBCXX_PARALLEL",
133       "_GLIBCXX_PARALLEL_ASSERTIONS",
134       "_GLIBCXX_SANITIZE_VECTOR",
135       "_GLIBCXX_USE_CXX11_ABI",
136       "_GLIBCXX_USE_DEPRECATED",
137       "_GNU_SOURCE",
138       "_ISOC11_SOURCE",
139       "_ISOC95_SOURCE",
140       "_ISOC99_SOURCE",
141       "_LARGEFILE64_SOURCE",
142       "_POSIX_C_SOURCE",
143       "_REENTRANT",
144       "_SVID_SOURCE",
145       "_THREAD_SAFE",
146       "_XOPEN_SOURCE",
147       "_XOPEN_SOURCE_EXTENDED",
148       "__STDCPP_WANT_MATH_SPEC_FUNCS__",
149       "__STDC_FORMAT_MACROS",
150   };
151   return std::binary_search(std::begin(ReservedMacro), std::end(ReservedMacro),
152                             MacroName);
153 }
154 
isLanguageDefinedBuiltin(const SourceManager & SourceMgr,const MacroInfo * MI,const StringRef MacroName)155 static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
156                                      const MacroInfo *MI,
157                                      const StringRef MacroName) {
158   // If this is a macro with special handling (like __LINE__) then it's language
159   // defined.
160   if (MI->isBuiltinMacro())
161     return true;
162   // Builtin macros are defined in the builtin file
163   if (!SourceMgr.isWrittenInBuiltinFile(MI->getDefinitionLoc()))
164     return false;
165   // C defines macros starting with __STDC, and C++ defines macros starting with
166   // __STDCPP
167   if (MacroName.starts_with("__STDC"))
168     return true;
169   // C++ defines the __cplusplus macro
170   if (MacroName == "__cplusplus")
171     return true;
172   // C++ defines various feature-test macros starting with __cpp
173   if (MacroName.starts_with("__cpp"))
174     return true;
175   // Anything else isn't language-defined
176   return false;
177 }
178 
shouldWarnOnMacroDef(Preprocessor & PP,IdentifierInfo * II)179 static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
180   const LangOptions &Lang = PP.getLangOpts();
181   StringRef Text = II->getName();
182   if (isReservedInAllContexts(II->isReserved(Lang)))
183     return isFeatureTestMacro(Text) ? MD_NoWarn : MD_ReservedMacro;
184   if (II->isKeyword(Lang))
185     return MD_KeywordDef;
186   if (Lang.CPlusPlus11 && (Text.equals("override") || Text.equals("final")))
187     return MD_KeywordDef;
188   return MD_NoWarn;
189 }
190 
shouldWarnOnMacroUndef(Preprocessor & PP,IdentifierInfo * II)191 static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
192   const LangOptions &Lang = PP.getLangOpts();
193   // Do not warn on keyword undef.  It is generally harmless and widely used.
194   if (isReservedInAllContexts(II->isReserved(Lang)))
195     return MD_ReservedMacro;
196   return MD_NoWarn;
197 }
198 
199 // Return true if we want to issue a diagnostic by default if we
200 // encounter this name in a #include with the wrong case. For now,
201 // this includes the standard C and C++ headers, Posix headers,
202 // and Boost headers. Improper case for these #includes is a
203 // potential portability issue.
warnByDefaultOnWrongCase(StringRef Include)204 static bool warnByDefaultOnWrongCase(StringRef Include) {
205   // If the first component of the path is "boost", treat this like a standard header
206   // for the purposes of diagnostics.
207   if (::llvm::sys::path::begin(Include)->equals_insensitive("boost"))
208     return true;
209 
210   // "condition_variable" is the longest standard header name at 18 characters.
211   // If the include file name is longer than that, it can't be a standard header.
212   static const size_t MaxStdHeaderNameLen = 18u;
213   if (Include.size() > MaxStdHeaderNameLen)
214     return false;
215 
216   // Lowercase and normalize the search string.
217   SmallString<32> LowerInclude{Include};
218   for (char &Ch : LowerInclude) {
219     // In the ASCII range?
220     if (static_cast<unsigned char>(Ch) > 0x7f)
221       return false; // Can't be a standard header
222     // ASCII lowercase:
223     if (Ch >= 'A' && Ch <= 'Z')
224       Ch += 'a' - 'A';
225     // Normalize path separators for comparison purposes.
226     else if (::llvm::sys::path::is_separator(Ch))
227       Ch = '/';
228   }
229 
230   // The standard C/C++ and Posix headers
231   return llvm::StringSwitch<bool>(LowerInclude)
232     // C library headers
233     .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
234     .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
235     .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
236     .Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stddef.h", true)
237     .Cases("stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true)
238     .Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true)
239     .Cases("wchar.h", "wctype.h", true)
240 
241     // C++ headers for C library facilities
242     .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
243     .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
244     .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
245     .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
246     .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
247     .Case("cwctype", true)
248 
249     // C++ library headers
250     .Cases("algorithm", "fstream", "list", "regex", "thread", true)
251     .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
252     .Cases("atomic", "future", "map", "set", "type_traits", true)
253     .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
254     .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
255     .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
256     .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
257     .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
258     .Cases("deque", "istream", "queue", "string", "valarray", true)
259     .Cases("exception", "iterator", "random", "strstream", "vector", true)
260     .Cases("forward_list", "limits", "ratio", "system_error", true)
261 
262     // POSIX headers (which aren't also C headers)
263     .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
264     .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
265     .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
266     .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
267     .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
268     .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
269     .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
270     .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
271     .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
272     .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
273     .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
274     .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
275     .Default(false);
276 }
277 
278 /// Find a similar string in `Candidates`.
279 ///
280 /// \param LHS a string for a similar string in `Candidates`
281 ///
282 /// \param Candidates the candidates to find a similar string.
283 ///
284 /// \returns a similar string if exists. If no similar string exists,
285 /// returns std::nullopt.
286 static std::optional<StringRef>
findSimilarStr(StringRef LHS,const std::vector<StringRef> & Candidates)287 findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
288   // We need to check if `Candidates` has the exact case-insensitive string
289   // because the Levenshtein distance match does not care about it.
290   for (StringRef C : Candidates) {
291     if (LHS.equals_insensitive(C)) {
292       return C;
293     }
294   }
295 
296   // Keep going with the Levenshtein distance match.
297   // If the LHS size is less than 3, use the LHS size minus 1 and if not,
298   // use the LHS size divided by 3.
299   size_t Length = LHS.size();
300   size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
301 
302   std::optional<std::pair<StringRef, size_t>> SimilarStr;
303   for (StringRef C : Candidates) {
304     size_t CurDist = LHS.edit_distance(C, true);
305     if (CurDist <= MaxDist) {
306       if (!SimilarStr) {
307         // The first similar string found.
308         SimilarStr = {C, CurDist};
309       } else if (CurDist < SimilarStr->second) {
310         // More similar string found.
311         SimilarStr = {C, CurDist};
312       }
313     }
314   }
315 
316   if (SimilarStr) {
317     return SimilarStr->first;
318   } else {
319     return std::nullopt;
320   }
321 }
322 
CheckMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)323 bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
324                                   bool *ShadowFlag) {
325   // Missing macro name?
326   if (MacroNameTok.is(tok::eod))
327     return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
328 
329   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
330   if (!II)
331     return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
332 
333   if (II->isCPlusPlusOperatorKeyword()) {
334     // C++ 2.5p2: Alternative tokens behave the same as its primary token
335     // except for their spellings.
336     Diag(MacroNameTok, getLangOpts().MicrosoftExt
337                            ? diag::ext_pp_operator_used_as_macro_name
338                            : diag::err_pp_operator_used_as_macro_name)
339         << II << MacroNameTok.getKind();
340     // Allow #defining |and| and friends for Microsoft compatibility or
341     // recovery when legacy C headers are included in C++.
342   }
343 
344   if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
345     // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
346     return Diag(MacroNameTok, diag::err_defined_macro_name);
347   }
348 
349   // If defining/undefining reserved identifier or a keyword, we need to issue
350   // a warning.
351   SourceLocation MacroNameLoc = MacroNameTok.getLocation();
352   if (ShadowFlag)
353     *ShadowFlag = false;
354   if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&
355       (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) {
356     MacroDiag D = MD_NoWarn;
357     if (isDefineUndef == MU_Define) {
358       D = shouldWarnOnMacroDef(*this, II);
359     }
360     else if (isDefineUndef == MU_Undef)
361       D = shouldWarnOnMacroUndef(*this, II);
362     if (D == MD_KeywordDef) {
363       // We do not want to warn on some patterns widely used in configuration
364       // scripts.  This requires analyzing next tokens, so do not issue warnings
365       // now, only inform caller.
366       if (ShadowFlag)
367         *ShadowFlag = true;
368     }
369     if (D == MD_ReservedMacro)
370       Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
371   }
372 
373   // Okay, we got a good identifier.
374   return false;
375 }
376 
377 /// Lex and validate a macro name, which occurs after a
378 /// \#define or \#undef.
379 ///
380 /// This sets the token kind to eod and discards the rest of the macro line if
381 /// the macro name is invalid.
382 ///
383 /// \param MacroNameTok Token that is expected to be a macro name.
384 /// \param isDefineUndef Context in which macro is used.
385 /// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
ReadMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)386 void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
387                                  bool *ShadowFlag) {
388   // Read the token, don't allow macro expansion on it.
389   LexUnexpandedToken(MacroNameTok);
390 
391   if (MacroNameTok.is(tok::code_completion)) {
392     if (CodeComplete)
393       CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);
394     setCodeCompletionReached();
395     LexUnexpandedToken(MacroNameTok);
396   }
397 
398   if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
399     return;
400 
401   // Invalid macro name, read and discard the rest of the line and set the
402   // token kind to tok::eod if necessary.
403   if (MacroNameTok.isNot(tok::eod)) {
404     MacroNameTok.setKind(tok::eod);
405     DiscardUntilEndOfDirective();
406   }
407 }
408 
409 /// Ensure that the next token is a tok::eod token.
410 ///
411 /// If not, emit a diagnostic and consume up until the eod.  If EnableMacros is
412 /// true, then we consider macros that expand to zero tokens as being ok.
413 ///
414 /// Returns the location of the end of the directive.
CheckEndOfDirective(const char * DirType,bool EnableMacros)415 SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
416                                                  bool EnableMacros) {
417   Token Tmp;
418   // Lex unexpanded tokens for most directives: macros might expand to zero
419   // tokens, causing us to miss diagnosing invalid lines.  Some directives (like
420   // #line) allow empty macros.
421   if (EnableMacros)
422     Lex(Tmp);
423   else
424     LexUnexpandedToken(Tmp);
425 
426   // There should be no tokens after the directive, but we allow them as an
427   // extension.
428   while (Tmp.is(tok::comment))  // Skip comments in -C mode.
429     LexUnexpandedToken(Tmp);
430 
431   if (Tmp.is(tok::eod))
432     return Tmp.getLocation();
433 
434   // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
435   // or if this is a macro-style preprocessing directive, because it is more
436   // trouble than it is worth to insert /**/ and check that there is no /**/
437   // in the range also.
438   FixItHint Hint;
439   if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
440       !CurTokenLexer)
441     Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
442   Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
443   return DiscardUntilEndOfDirective().getEnd();
444 }
445 
SuggestTypoedDirective(const Token & Tok,StringRef Directive) const446 void Preprocessor::SuggestTypoedDirective(const Token &Tok,
447                                           StringRef Directive) const {
448   // If this is a `.S` file, treat unknown # directives as non-preprocessor
449   // directives.
450   if (getLangOpts().AsmPreprocessor) return;
451 
452   std::vector<StringRef> Candidates = {
453       "if", "ifdef", "ifndef", "elif", "else", "endif"
454   };
455   if (LangOpts.C23 || LangOpts.CPlusPlus23)
456     Candidates.insert(Candidates.end(), {"elifdef", "elifndef"});
457 
458   if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {
459     // Directive cannot be coming from macro.
460     assert(Tok.getLocation().isFileID());
461     CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
462         Tok.getLocation(),
463         Tok.getLocation().getLocWithOffset(Directive.size()));
464     StringRef SuggValue = *Sugg;
465 
466     auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue);
467     Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
468   }
469 }
470 
471 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
472 /// decided that the subsequent tokens are in the \#if'd out portion of the
473 /// file.  Lex the rest of the file, until we see an \#endif.  If
474 /// FoundNonSkipPortion is true, then we have already emitted code for part of
475 /// this \#if directive, so \#else/\#elif blocks should never be entered.
476 /// If ElseOk is true, then \#else directives are ok, if not, then we have
477 /// already seen one so a \#else directive is a duplicate.  When this returns,
478 /// the caller can lex the first valid token.
SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,SourceLocation IfTokenLoc,bool FoundNonSkipPortion,bool FoundElse,SourceLocation ElseLoc)479 void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
480                                                 SourceLocation IfTokenLoc,
481                                                 bool FoundNonSkipPortion,
482                                                 bool FoundElse,
483                                                 SourceLocation ElseLoc) {
484   // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
485   // not getting called recursively by storing the RecordedSkippedRanges
486   // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
487   // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
488   // invalidated. If this changes and there is a need to call
489   // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
490   // change to do a second lookup in endLexPass function instead of reusing the
491   // lookup pointer.
492   assert(!SkippingExcludedConditionalBlock &&
493          "calling SkipExcludedConditionalBlock recursively");
494   llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
495 
496   ++NumSkipped;
497   assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
498   assert(CurPPLexer && "Conditional PP block must be in a file!");
499   assert(CurLexer && "Conditional PP block but no current lexer set!");
500 
501   if (PreambleConditionalStack.reachedEOFWhileSkipping())
502     PreambleConditionalStack.clearSkipInfo();
503   else
504     CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
505                                      FoundNonSkipPortion, FoundElse);
506 
507   // Enter raw mode to disable identifier lookup (and thus macro expansion),
508   // disabling warnings, etc.
509   CurPPLexer->LexingRawMode = true;
510   Token Tok;
511   SourceLocation endLoc;
512 
513   /// Keeps track and caches skipped ranges and also retrieves a prior skipped
514   /// range if the same block is re-visited.
515   struct SkippingRangeStateTy {
516     Preprocessor &PP;
517 
518     const char *BeginPtr = nullptr;
519     unsigned *SkipRangePtr = nullptr;
520 
521     SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
522 
523     void beginLexPass() {
524       if (BeginPtr)
525         return; // continue skipping a block.
526 
527       // Initiate a skipping block and adjust the lexer if we already skipped it
528       // before.
529       BeginPtr = PP.CurLexer->getBufferLocation();
530       SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
531       if (*SkipRangePtr) {
532         PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
533                           /*IsAtStartOfLine*/ true);
534       }
535     }
536 
537     void endLexPass(const char *Hashptr) {
538       if (!BeginPtr) {
539         // Not doing normal lexing.
540         assert(PP.CurLexer->isDependencyDirectivesLexer());
541         return;
542       }
543 
544       // Finished skipping a block, record the range if it's first time visited.
545       if (!*SkipRangePtr) {
546         *SkipRangePtr = Hashptr - BeginPtr;
547       }
548       assert(*SkipRangePtr == Hashptr - BeginPtr);
549       BeginPtr = nullptr;
550       SkipRangePtr = nullptr;
551     }
552   } SkippingRangeState(*this);
553 
554   while (true) {
555     if (CurLexer->isDependencyDirectivesLexer()) {
556       CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
557     } else {
558       SkippingRangeState.beginLexPass();
559       while (true) {
560         CurLexer->Lex(Tok);
561 
562         if (Tok.is(tok::code_completion)) {
563           setCodeCompletionReached();
564           if (CodeComplete)
565             CodeComplete->CodeCompleteInConditionalExclusion();
566           continue;
567         }
568 
569         // If this is the end of the buffer, we have an error.
570         if (Tok.is(tok::eof)) {
571           // We don't emit errors for unterminated conditionals here,
572           // Lexer::LexEndOfFile can do that properly.
573           // Just return and let the caller lex after this #include.
574           if (PreambleConditionalStack.isRecording())
575             PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
576                                                       FoundNonSkipPortion,
577                                                       FoundElse, ElseLoc);
578           break;
579         }
580 
581         // If this token is not a preprocessor directive, just skip it.
582         if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
583           continue;
584 
585         break;
586       }
587     }
588     if (Tok.is(tok::eof))
589       break;
590 
591     // We just parsed a # character at the start of a line, so we're in
592     // directive mode.  Tell the lexer this so any newlines we see will be
593     // converted into an EOD token (this terminates the macro).
594     CurPPLexer->ParsingPreprocessorDirective = true;
595     if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
596 
597     assert(Tok.is(tok::hash));
598     const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
599     assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
600 
601     // Read the next token, the directive flavor.
602     LexUnexpandedToken(Tok);
603 
604     // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
605     // something bogus), skip it.
606     if (Tok.isNot(tok::raw_identifier)) {
607       CurPPLexer->ParsingPreprocessorDirective = false;
608       // Restore comment saving mode.
609       if (CurLexer) CurLexer->resetExtendedTokenMode();
610       continue;
611     }
612 
613     // If the first letter isn't i or e, it isn't intesting to us.  We know that
614     // this is safe in the face of spelling differences, because there is no way
615     // to spell an i/e in a strange way that is another letter.  Skipping this
616     // allows us to avoid looking up the identifier info for #define/#undef and
617     // other common directives.
618     StringRef RI = Tok.getRawIdentifier();
619 
620     char FirstChar = RI[0];
621     if (FirstChar >= 'a' && FirstChar <= 'z' &&
622         FirstChar != 'i' && FirstChar != 'e') {
623       CurPPLexer->ParsingPreprocessorDirective = false;
624       // Restore comment saving mode.
625       if (CurLexer) CurLexer->resetExtendedTokenMode();
626       continue;
627     }
628 
629     // Get the identifier name without trigraphs or embedded newlines.  Note
630     // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
631     // when skipping.
632     char DirectiveBuf[20];
633     StringRef Directive;
634     if (!Tok.needsCleaning() && RI.size() < 20) {
635       Directive = RI;
636     } else {
637       std::string DirectiveStr = getSpelling(Tok);
638       size_t IdLen = DirectiveStr.size();
639       if (IdLen >= 20) {
640         CurPPLexer->ParsingPreprocessorDirective = false;
641         // Restore comment saving mode.
642         if (CurLexer) CurLexer->resetExtendedTokenMode();
643         continue;
644       }
645       memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
646       Directive = StringRef(DirectiveBuf, IdLen);
647     }
648 
649     if (Directive.starts_with("if")) {
650       StringRef Sub = Directive.substr(2);
651       if (Sub.empty() ||   // "if"
652           Sub == "def" ||   // "ifdef"
653           Sub == "ndef") {  // "ifndef"
654         // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
655         // bother parsing the condition.
656         DiscardUntilEndOfDirective();
657         CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
658                                        /*foundnonskip*/false,
659                                        /*foundelse*/false);
660       } else {
661         SuggestTypoedDirective(Tok, Directive);
662       }
663     } else if (Directive[0] == 'e') {
664       StringRef Sub = Directive.substr(1);
665       if (Sub == "ndif") {  // "endif"
666         PPConditionalInfo CondInfo;
667         CondInfo.WasSkipping = true; // Silence bogus warning.
668         bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
669         (void)InCond;  // Silence warning in no-asserts mode.
670         assert(!InCond && "Can't be skipping if not in a conditional!");
671 
672         // If we popped the outermost skipping block, we're done skipping!
673         if (!CondInfo.WasSkipping) {
674           SkippingRangeState.endLexPass(Hashptr);
675           // Restore the value of LexingRawMode so that trailing comments
676           // are handled correctly, if we've reached the outermost block.
677           CurPPLexer->LexingRawMode = false;
678           endLoc = CheckEndOfDirective("endif");
679           CurPPLexer->LexingRawMode = true;
680           if (Callbacks)
681             Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);
682           break;
683         } else {
684           DiscardUntilEndOfDirective();
685         }
686       } else if (Sub == "lse") { // "else".
687         // #else directive in a skipping conditional.  If not in some other
688         // skipping conditional, and if #else hasn't already been seen, enter it
689         // as a non-skipping conditional.
690         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
691 
692         if (!CondInfo.WasSkipping)
693           SkippingRangeState.endLexPass(Hashptr);
694 
695         // If this is a #else with a #else before it, report the error.
696         if (CondInfo.FoundElse)
697           Diag(Tok, diag::pp_err_else_after_else);
698 
699         // Note that we've seen a #else in this conditional.
700         CondInfo.FoundElse = true;
701 
702         // If the conditional is at the top level, and the #if block wasn't
703         // entered, enter the #else block now.
704         if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
705           CondInfo.FoundNonSkip = true;
706           // Restore the value of LexingRawMode so that trailing comments
707           // are handled correctly.
708           CurPPLexer->LexingRawMode = false;
709           endLoc = CheckEndOfDirective("else");
710           CurPPLexer->LexingRawMode = true;
711           if (Callbacks)
712             Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);
713           break;
714         } else {
715           DiscardUntilEndOfDirective();  // C99 6.10p4.
716         }
717       } else if (Sub == "lif") {  // "elif".
718         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
719 
720         if (!CondInfo.WasSkipping)
721           SkippingRangeState.endLexPass(Hashptr);
722 
723         // If this is a #elif with a #else before it, report the error.
724         if (CondInfo.FoundElse)
725           Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;
726 
727         // If this is in a skipping block or if we're already handled this #if
728         // block, don't bother parsing the condition.
729         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
730           // FIXME: We should probably do at least some minimal parsing of the
731           // condition to verify that it is well-formed. The current state
732           // allows #elif* directives with completely malformed (or missing)
733           // conditions.
734           DiscardUntilEndOfDirective();
735         } else {
736           // Restore the value of LexingRawMode so that identifiers are
737           // looked up, etc, inside the #elif expression.
738           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
739           CurPPLexer->LexingRawMode = false;
740           IdentifierInfo *IfNDefMacro = nullptr;
741           DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
742           // Stop if Lexer became invalid after hitting code completion token.
743           if (!CurPPLexer)
744             return;
745           const bool CondValue = DER.Conditional;
746           CurPPLexer->LexingRawMode = true;
747           if (Callbacks) {
748             Callbacks->Elif(
749                 Tok.getLocation(), DER.ExprRange,
750                 (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
751                 CondInfo.IfLoc);
752           }
753           // If this condition is true, enter it!
754           if (CondValue) {
755             CondInfo.FoundNonSkip = true;
756             break;
757           }
758         }
759       } else if (Sub == "lifdef" ||  // "elifdef"
760                  Sub == "lifndef") { // "elifndef"
761         bool IsElifDef = Sub == "lifdef";
762         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
763         Token DirectiveToken = Tok;
764 
765         if (!CondInfo.WasSkipping)
766           SkippingRangeState.endLexPass(Hashptr);
767 
768         // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
769         // if this branch is in a skipping block.
770         unsigned DiagID;
771         if (LangOpts.CPlusPlus)
772           DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
773                                         : diag::ext_cxx23_pp_directive;
774         else
775           DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
776                                 : diag::ext_c23_pp_directive;
777         Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
778 
779         // If this is a #elif with a #else before it, report the error.
780         if (CondInfo.FoundElse)
781           Diag(Tok, diag::pp_err_elif_after_else)
782               << (IsElifDef ? PED_Elifdef : PED_Elifndef);
783 
784         // If this is in a skipping block or if we're already handled this #if
785         // block, don't bother parsing the condition.
786         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
787           // FIXME: We should probably do at least some minimal parsing of the
788           // condition to verify that it is well-formed. The current state
789           // allows #elif* directives with completely malformed (or missing)
790           // conditions.
791           DiscardUntilEndOfDirective();
792         } else {
793           // Restore the value of LexingRawMode so that identifiers are
794           // looked up, etc, inside the #elif[n]def expression.
795           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
796           CurPPLexer->LexingRawMode = false;
797           Token MacroNameTok;
798           ReadMacroName(MacroNameTok);
799           CurPPLexer->LexingRawMode = true;
800 
801           // If the macro name token is tok::eod, there was an error that was
802           // already reported.
803           if (MacroNameTok.is(tok::eod)) {
804             // Skip code until we get to #endif.  This helps with recovery by
805             // not emitting an error when the #endif is reached.
806             continue;
807           }
808 
809           emitMacroExpansionWarnings(MacroNameTok);
810 
811           CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef");
812 
813           IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
814           auto MD = getMacroDefinition(MII);
815           MacroInfo *MI = MD.getMacroInfo();
816 
817           if (Callbacks) {
818             if (IsElifDef) {
819               Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok,
820                                  MD);
821             } else {
822               Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok,
823                                   MD);
824             }
825           }
826           // If this condition is true, enter it!
827           if (static_cast<bool>(MI) == IsElifDef) {
828             CondInfo.FoundNonSkip = true;
829             break;
830           }
831         }
832       } else {
833         SuggestTypoedDirective(Tok, Directive);
834       }
835     } else {
836       SuggestTypoedDirective(Tok, Directive);
837     }
838 
839     CurPPLexer->ParsingPreprocessorDirective = false;
840     // Restore comment saving mode.
841     if (CurLexer) CurLexer->resetExtendedTokenMode();
842   }
843 
844   // Finally, if we are out of the conditional (saw an #endif or ran off the end
845   // of the file, just stop skipping and return to lexing whatever came after
846   // the #if block.
847   CurPPLexer->LexingRawMode = false;
848 
849   // The last skipped range isn't actually skipped yet if it's truncated
850   // by the end of the preamble; we'll resume parsing after the preamble.
851   if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
852     Callbacks->SourceRangeSkipped(
853         SourceRange(HashTokenLoc, endLoc.isValid()
854                                       ? endLoc
855                                       : CurPPLexer->getSourceLocation()),
856         Tok.getLocation());
857 }
858 
getModuleForLocation(SourceLocation Loc,bool AllowTextual)859 Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
860                                            bool AllowTextual) {
861   if (!SourceMgr.isInMainFile(Loc)) {
862     // Try to determine the module of the include directive.
863     // FIXME: Look into directly passing the FileEntry from LookupFile instead.
864     FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
865     if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(IDOfIncl)) {
866       // The include comes from an included file.
867       return HeaderInfo.getModuleMap()
868           .findModuleForHeader(*EntryOfIncl, AllowTextual)
869           .getModule();
870     }
871   }
872 
873   // This is either in the main file or not in a file at all. It belongs
874   // to the current module, if there is one.
875   return getLangOpts().CurrentModule.empty()
876              ? nullptr
877              : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc);
878 }
879 
880 OptionalFileEntryRef
getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,SourceLocation Loc)881 Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
882                                                SourceLocation Loc) {
883   Module *IncM = getModuleForLocation(
884       IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
885 
886   // Walk up through the include stack, looking through textual headers of M
887   // until we hit a non-textual header that we can #include. (We assume textual
888   // headers of a module with non-textual headers aren't meant to be used to
889   // import entities from the module.)
890   auto &SM = getSourceManager();
891   while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
892     auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
893     auto FE = SM.getFileEntryRefForID(ID);
894     if (!FE)
895       break;
896 
897     // We want to find all possible modules that might contain this header, so
898     // search all enclosing directories for module maps and load them.
899     HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr,
900                             SourceMgr.isInSystemHeader(Loc));
901 
902     bool InPrivateHeader = false;
903     for (auto Header : HeaderInfo.findAllModulesForHeader(*FE)) {
904       if (!Header.isAccessibleFrom(IncM)) {
905         // It's in a private header; we can't #include it.
906         // FIXME: If there's a public header in some module that re-exports it,
907         // then we could suggest including that, but it's not clear that's the
908         // expected way to make this entity visible.
909         InPrivateHeader = true;
910         continue;
911       }
912 
913       // Don't suggest explicitly excluded headers.
914       if (Header.getRole() == ModuleMap::ExcludedHeader)
915         continue;
916 
917       // We'll suggest including textual headers below if they're
918       // include-guarded.
919       if (Header.getRole() & ModuleMap::TextualHeader)
920         continue;
921 
922       // If we have a module import syntax, we shouldn't include a header to
923       // make a particular module visible. Let the caller know they should
924       // suggest an import instead.
925       if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
926         return std::nullopt;
927 
928       // If this is an accessible, non-textual header of M's top-level module
929       // that transitively includes the given location and makes the
930       // corresponding module visible, this is the thing to #include.
931       return *FE;
932     }
933 
934     // FIXME: If we're bailing out due to a private header, we shouldn't suggest
935     // an import either.
936     if (InPrivateHeader)
937       return std::nullopt;
938 
939     // If the header is includable and has an include guard, assume the
940     // intended way to expose its contents is by #include, not by importing a
941     // module that transitively includes it.
942     if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(*FE))
943       return *FE;
944 
945     Loc = SM.getIncludeLoc(ID);
946   }
947 
948   return std::nullopt;
949 }
950 
LookupFile(SourceLocation FilenameLoc,StringRef Filename,bool isAngled,ConstSearchDirIterator FromDir,const FileEntry * FromFile,ConstSearchDirIterator * CurDirArg,SmallVectorImpl<char> * SearchPath,SmallVectorImpl<char> * RelativePath,ModuleMap::KnownHeader * SuggestedModule,bool * IsMapped,bool * IsFrameworkFound,bool SkipCache,bool OpenFile,bool CacheFailures)951 OptionalFileEntryRef Preprocessor::LookupFile(
952     SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
953     ConstSearchDirIterator FromDir, const FileEntry *FromFile,
954     ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
955     SmallVectorImpl<char> *RelativePath,
956     ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
957     bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
958   ConstSearchDirIterator CurDirLocal = nullptr;
959   ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
960 
961   Module *RequestingModule = getModuleForLocation(
962       FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
963 
964   // If the header lookup mechanism may be relative to the current inclusion
965   // stack, record the parent #includes.
966   SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
967   bool BuildSystemModule = false;
968   if (!FromDir && !FromFile) {
969     FileID FID = getCurrentFileLexer()->getFileID();
970     OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
971 
972     // If there is no file entry associated with this file, it must be the
973     // predefines buffer or the module includes buffer. Any other file is not
974     // lexed with a normal lexer, so it won't be scanned for preprocessor
975     // directives.
976     //
977     // If we have the predefines buffer, resolve #include references (which come
978     // from the -include command line argument) from the current working
979     // directory instead of relative to the main file.
980     //
981     // If we have the module includes buffer, resolve #include references (which
982     // come from header declarations in the module map) relative to the module
983     // map file.
984     if (!FileEnt) {
985       if (FID == SourceMgr.getMainFileID() && MainFileDir) {
986         auto IncludeDir =
987             HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
988                 Filename, getCurrentModule())
989                 ? HeaderInfo.getModuleMap().getBuiltinDir()
990                 : MainFileDir;
991         Includers.push_back(std::make_pair(std::nullopt, *IncludeDir));
992         BuildSystemModule = getCurrentModule()->IsSystem;
993       } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
994                       SourceMgr.getMainFileID()))) {
995         auto CWD = FileMgr.getOptionalDirectoryRef(".");
996         Includers.push_back(std::make_pair(*FileEnt, *CWD));
997       }
998     } else {
999       Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1000     }
1001 
1002     // MSVC searches the current include stack from top to bottom for
1003     // headers included by quoted include directives.
1004     // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1005     if (LangOpts.MSVCCompat && !isAngled) {
1006       for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1007         if (IsFileLexer(ISEntry))
1008           if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1009             Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1010       }
1011     }
1012   }
1013 
1014   CurDir = CurDirLookup;
1015 
1016   if (FromFile) {
1017     // We're supposed to start looking from after a particular file. Search
1018     // the include path until we find that file or run out of files.
1019     ConstSearchDirIterator TmpCurDir = CurDir;
1020     ConstSearchDirIterator TmpFromDir = nullptr;
1021     while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1022                Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir,
1023                Includers, SearchPath, RelativePath, RequestingModule,
1024                SuggestedModule, /*IsMapped=*/nullptr,
1025                /*IsFrameworkFound=*/nullptr, SkipCache)) {
1026       // Keep looking as if this file did a #include_next.
1027       TmpFromDir = TmpCurDir;
1028       ++TmpFromDir;
1029       if (&FE->getFileEntry() == FromFile) {
1030         // Found it.
1031         FromDir = TmpFromDir;
1032         CurDir = TmpCurDir;
1033         break;
1034       }
1035     }
1036   }
1037 
1038   // Do a standard file entry lookup.
1039   OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1040       Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath,
1041       RelativePath, RequestingModule, SuggestedModule, IsMapped,
1042       IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1043   if (FE)
1044     return FE;
1045 
1046   OptionalFileEntryRef CurFileEnt;
1047   // Otherwise, see if this is a subframework header.  If so, this is relative
1048   // to one of the headers on the #include stack.  Walk the list of the current
1049   // headers on the #include stack and pass them to HeaderInfo.
1050   if (IsFileLexer()) {
1051     if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1052       if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1053               Filename, *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1054               SuggestedModule)) {
1055         return FE;
1056       }
1057     }
1058   }
1059 
1060   for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1061     if (IsFileLexer(ISEntry)) {
1062       if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1063         if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1064                 Filename, *CurFileEnt, SearchPath, RelativePath,
1065                 RequestingModule, SuggestedModule)) {
1066           return FE;
1067         }
1068       }
1069     }
1070   }
1071 
1072   // Otherwise, we really couldn't find the file.
1073   return std::nullopt;
1074 }
1075 
1076 //===----------------------------------------------------------------------===//
1077 // Preprocessor Directive Handling.
1078 //===----------------------------------------------------------------------===//
1079 
1080 class Preprocessor::ResetMacroExpansionHelper {
1081 public:
ResetMacroExpansionHelper(Preprocessor * pp)1082   ResetMacroExpansionHelper(Preprocessor *pp)
1083     : PP(pp), save(pp->DisableMacroExpansion) {
1084     if (pp->MacroExpansionInDirectivesOverride)
1085       pp->DisableMacroExpansion = false;
1086   }
1087 
~ResetMacroExpansionHelper()1088   ~ResetMacroExpansionHelper() {
1089     PP->DisableMacroExpansion = save;
1090   }
1091 
1092 private:
1093   Preprocessor *PP;
1094   bool save;
1095 };
1096 
1097 /// Process a directive while looking for the through header or a #pragma
1098 /// hdrstop. The following directives are handled:
1099 /// #include (to check if it is the through header)
1100 /// #define (to warn about macros that don't match the PCH)
1101 /// #pragma (to check for pragma hdrstop).
1102 /// All other directives are completely discarded.
HandleSkippedDirectiveWhileUsingPCH(Token & Result,SourceLocation HashLoc)1103 void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1104                                                        SourceLocation HashLoc) {
1105   if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1106     if (II->getPPKeywordID() == tok::pp_define) {
1107       return HandleDefineDirective(Result,
1108                                    /*ImmediatelyAfterHeaderGuard=*/false);
1109     }
1110     if (SkippingUntilPCHThroughHeader &&
1111         II->getPPKeywordID() == tok::pp_include) {
1112       return HandleIncludeDirective(HashLoc, Result);
1113     }
1114     if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1115       Lex(Result);
1116       auto *II = Result.getIdentifierInfo();
1117       if (II && II->getName() == "hdrstop")
1118         return HandlePragmaHdrstop(Result);
1119     }
1120   }
1121   DiscardUntilEndOfDirective();
1122 }
1123 
1124 /// HandleDirective - This callback is invoked when the lexer sees a # token
1125 /// at the start of a line.  This consumes the directive, modifies the
1126 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
1127 /// read is the correct one.
HandleDirective(Token & Result)1128 void Preprocessor::HandleDirective(Token &Result) {
1129   // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1130 
1131   // We just parsed a # character at the start of a line, so we're in directive
1132   // mode.  Tell the lexer this so any newlines we see will be converted into an
1133   // EOD token (which terminates the directive).
1134   CurPPLexer->ParsingPreprocessorDirective = true;
1135   if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1136 
1137   bool ImmediatelyAfterTopLevelIfndef =
1138       CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1139   CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1140 
1141   ++NumDirectives;
1142 
1143   // We are about to read a token.  For the multiple-include optimization FA to
1144   // work, we have to remember if we had read any tokens *before* this
1145   // pp-directive.
1146   bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1147 
1148   // Save the '#' token in case we need to return it later.
1149   Token SavedHash = Result;
1150 
1151   // Read the next token, the directive flavor.  This isn't expanded due to
1152   // C99 6.10.3p8.
1153   LexUnexpandedToken(Result);
1154 
1155   // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
1156   //   #define A(x) #x
1157   //   A(abc
1158   //     #warning blah
1159   //   def)
1160   // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1161   // not support this for #include-like directives, since that can result in
1162   // terrible diagnostics, and does not work in GCC.
1163   if (InMacroArgs) {
1164     if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1165       switch (II->getPPKeywordID()) {
1166       case tok::pp_include:
1167       case tok::pp_import:
1168       case tok::pp_include_next:
1169       case tok::pp___include_macros:
1170       case tok::pp_pragma:
1171         Diag(Result, diag::err_embedded_directive) << II->getName();
1172         Diag(*ArgMacro, diag::note_macro_expansion_here)
1173             << ArgMacro->getIdentifierInfo();
1174         DiscardUntilEndOfDirective();
1175         return;
1176       default:
1177         break;
1178       }
1179     }
1180     Diag(Result, diag::ext_embedded_directive);
1181   }
1182 
1183   // Temporarily enable macro expansion if set so
1184   // and reset to previous state when returning from this function.
1185   ResetMacroExpansionHelper helper(this);
1186 
1187   if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1188     return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());
1189 
1190   switch (Result.getKind()) {
1191   case tok::eod:
1192     // Ignore the null directive with regards to the multiple-include
1193     // optimization, i.e. allow the null directive to appear outside of the
1194     // include guard and still enable the multiple-include optimization.
1195     CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1196     return;   // null directive.
1197   case tok::code_completion:
1198     setCodeCompletionReached();
1199     if (CodeComplete)
1200       CodeComplete->CodeCompleteDirective(
1201                                     CurPPLexer->getConditionalStackDepth() > 0);
1202     return;
1203   case tok::numeric_constant:  // # 7  GNU line marker directive.
1204     // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1205     // directive. However do permit it in the predefines file, as we use line
1206     // markers to mark the builtin macros as being in a system header.
1207     if (getLangOpts().AsmPreprocessor &&
1208         SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID())
1209       break;
1210     return HandleDigitDirective(Result);
1211   default:
1212     IdentifierInfo *II = Result.getIdentifierInfo();
1213     if (!II) break; // Not an identifier.
1214 
1215     // Ask what the preprocessor keyword ID is.
1216     switch (II->getPPKeywordID()) {
1217     default: break;
1218     // C99 6.10.1 - Conditional Inclusion.
1219     case tok::pp_if:
1220       return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);
1221     case tok::pp_ifdef:
1222       return HandleIfdefDirective(Result, SavedHash, false,
1223                                   true /*not valid for miopt*/);
1224     case tok::pp_ifndef:
1225       return HandleIfdefDirective(Result, SavedHash, true,
1226                                   ReadAnyTokensBeforeDirective);
1227     case tok::pp_elif:
1228     case tok::pp_elifdef:
1229     case tok::pp_elifndef:
1230       return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID());
1231 
1232     case tok::pp_else:
1233       return HandleElseDirective(Result, SavedHash);
1234     case tok::pp_endif:
1235       return HandleEndifDirective(Result);
1236 
1237     // C99 6.10.2 - Source File Inclusion.
1238     case tok::pp_include:
1239       // Handle #include.
1240       return HandleIncludeDirective(SavedHash.getLocation(), Result);
1241     case tok::pp___include_macros:
1242       // Handle -imacros.
1243       return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
1244 
1245     // C99 6.10.3 - Macro Replacement.
1246     case tok::pp_define:
1247       return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
1248     case tok::pp_undef:
1249       return HandleUndefDirective();
1250 
1251     // C99 6.10.4 - Line Control.
1252     case tok::pp_line:
1253       return HandleLineDirective();
1254 
1255     // C99 6.10.5 - Error Directive.
1256     case tok::pp_error:
1257       return HandleUserDiagnosticDirective(Result, false);
1258 
1259     // C99 6.10.6 - Pragma Directive.
1260     case tok::pp_pragma:
1261       return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
1262 
1263     // GNU Extensions.
1264     case tok::pp_import:
1265       return HandleImportDirective(SavedHash.getLocation(), Result);
1266     case tok::pp_include_next:
1267       return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
1268 
1269     case tok::pp_warning:
1270       if (LangOpts.CPlusPlus)
1271         Diag(Result, LangOpts.CPlusPlus23
1272                          ? diag::warn_cxx23_compat_warning_directive
1273                          : diag::ext_pp_warning_directive)
1274             << /*C++23*/ 1;
1275       else
1276         Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1277                                   : diag::ext_pp_warning_directive)
1278             << /*C23*/ 0;
1279 
1280       return HandleUserDiagnosticDirective(Result, true);
1281     case tok::pp_ident:
1282       return HandleIdentSCCSDirective(Result);
1283     case tok::pp_sccs:
1284       return HandleIdentSCCSDirective(Result);
1285     case tok::pp_assert:
1286       //isExtension = true;  // FIXME: implement #assert
1287       break;
1288     case tok::pp_unassert:
1289       //isExtension = true;  // FIXME: implement #unassert
1290       break;
1291 
1292     case tok::pp___public_macro:
1293       if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1294         return HandleMacroPublicDirective(Result);
1295       break;
1296 
1297     case tok::pp___private_macro:
1298       if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1299         return HandleMacroPrivateDirective();
1300       break;
1301     }
1302     break;
1303   }
1304 
1305   // If this is a .S file, treat unknown # directives as non-preprocessor
1306   // directives.  This is important because # may be a comment or introduce
1307   // various pseudo-ops.  Just return the # token and push back the following
1308   // token to be lexed next time.
1309   if (getLangOpts().AsmPreprocessor) {
1310     auto Toks = std::make_unique<Token[]>(2);
1311     // Return the # and the token after it.
1312     Toks[0] = SavedHash;
1313     Toks[1] = Result;
1314 
1315     // If the second token is a hashhash token, then we need to translate it to
1316     // unknown so the token lexer doesn't try to perform token pasting.
1317     if (Result.is(tok::hashhash))
1318       Toks[1].setKind(tok::unknown);
1319 
1320     // Enter this token stream so that we re-lex the tokens.  Make sure to
1321     // enable macro expansion, in case the token after the # is an identifier
1322     // that is expanded.
1323     EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
1324     return;
1325   }
1326 
1327   // If we reached here, the preprocessing token is not valid!
1328   // Start suggesting if a similar directive found.
1329   Diag(Result, diag::err_pp_invalid_directive) << 0;
1330 
1331   // Read the rest of the PP line.
1332   DiscardUntilEndOfDirective();
1333 
1334   // Okay, we're done parsing the directive.
1335 }
1336 
1337 /// GetLineValue - Convert a numeric token into an unsigned value, emitting
1338 /// Diagnostic DiagID if it is invalid, and returning the value in Val.
GetLineValue(Token & DigitTok,unsigned & Val,unsigned DiagID,Preprocessor & PP,bool IsGNULineDirective=false)1339 static bool GetLineValue(Token &DigitTok, unsigned &Val,
1340                          unsigned DiagID, Preprocessor &PP,
1341                          bool IsGNULineDirective=false) {
1342   if (DigitTok.isNot(tok::numeric_constant)) {
1343     PP.Diag(DigitTok, DiagID);
1344 
1345     if (DigitTok.isNot(tok::eod))
1346       PP.DiscardUntilEndOfDirective();
1347     return true;
1348   }
1349 
1350   SmallString<64> IntegerBuffer;
1351   IntegerBuffer.resize(DigitTok.getLength());
1352   const char *DigitTokBegin = &IntegerBuffer[0];
1353   bool Invalid = false;
1354   unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
1355   if (Invalid)
1356     return true;
1357 
1358   // Verify that we have a simple digit-sequence, and compute the value.  This
1359   // is always a simple digit string computed in decimal, so we do this manually
1360   // here.
1361   Val = 0;
1362   for (unsigned i = 0; i != ActualLength; ++i) {
1363     // C++1y [lex.fcon]p1:
1364     //   Optional separating single quotes in a digit-sequence are ignored
1365     if (DigitTokBegin[i] == '\'')
1366       continue;
1367 
1368     if (!isDigit(DigitTokBegin[i])) {
1369       PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
1370               diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1371       PP.DiscardUntilEndOfDirective();
1372       return true;
1373     }
1374 
1375     unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1376     if (NextVal < Val) { // overflow.
1377       PP.Diag(DigitTok, DiagID);
1378       PP.DiscardUntilEndOfDirective();
1379       return true;
1380     }
1381     Val = NextVal;
1382   }
1383 
1384   if (DigitTokBegin[0] == '0' && Val)
1385     PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
1386       << IsGNULineDirective;
1387 
1388   return false;
1389 }
1390 
1391 /// Handle a \#line directive: C99 6.10.4.
1392 ///
1393 /// The two acceptable forms are:
1394 /// \verbatim
1395 ///   # line digit-sequence
1396 ///   # line digit-sequence "s-char-sequence"
1397 /// \endverbatim
HandleLineDirective()1398 void Preprocessor::HandleLineDirective() {
1399   // Read the line # and string argument.  Per C99 6.10.4p5, these tokens are
1400   // expanded.
1401   Token DigitTok;
1402   Lex(DigitTok);
1403 
1404   // Validate the number and convert it to an unsigned.
1405   unsigned LineNo;
1406   if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
1407     return;
1408 
1409   if (LineNo == 0)
1410     Diag(DigitTok, diag::ext_pp_line_zero);
1411 
1412   // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1413   // number greater than 2147483647".  C90 requires that the line # be <= 32767.
1414   unsigned LineLimit = 32768U;
1415   if (LangOpts.C99 || LangOpts.CPlusPlus11)
1416     LineLimit = 2147483648U;
1417   if (LineNo >= LineLimit)
1418     Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
1419   else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1420     Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
1421 
1422   int FilenameID = -1;
1423   Token StrTok;
1424   Lex(StrTok);
1425 
1426   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1427   // string followed by eod.
1428   if (StrTok.is(tok::eod))
1429     ; // ok
1430   else if (StrTok.isNot(tok::string_literal)) {
1431     Diag(StrTok, diag::err_pp_line_invalid_filename);
1432     DiscardUntilEndOfDirective();
1433     return;
1434   } else if (StrTok.hasUDSuffix()) {
1435     Diag(StrTok, diag::err_invalid_string_udl);
1436     DiscardUntilEndOfDirective();
1437     return;
1438   } else {
1439     // Parse and validate the string, converting it into a unique ID.
1440     StringLiteralParser Literal(StrTok, *this);
1441     assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1442     if (Literal.hadError) {
1443       DiscardUntilEndOfDirective();
1444       return;
1445     }
1446     if (Literal.Pascal) {
1447       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1448       DiscardUntilEndOfDirective();
1449       return;
1450     }
1451     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1452 
1453     // Verify that there is nothing after the string, other than EOD.  Because
1454     // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1455     CheckEndOfDirective("line", true);
1456   }
1457 
1458   // Take the file kind of the file containing the #line directive. #line
1459   // directives are often used for generated sources from the same codebase, so
1460   // the new file should generally be classified the same way as the current
1461   // file. This is visible in GCC's pre-processed output, which rewrites #line
1462   // to GNU line markers.
1463   SrcMgr::CharacteristicKind FileKind =
1464       SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1465 
1466   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
1467                         false, FileKind);
1468 
1469   if (Callbacks)
1470     Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
1471                            PPCallbacks::RenameFile, FileKind);
1472 }
1473 
1474 /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1475 /// marker directive.
ReadLineMarkerFlags(bool & IsFileEntry,bool & IsFileExit,SrcMgr::CharacteristicKind & FileKind,Preprocessor & PP)1476 static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1477                                 SrcMgr::CharacteristicKind &FileKind,
1478                                 Preprocessor &PP) {
1479   unsigned FlagVal;
1480   Token FlagTok;
1481   PP.Lex(FlagTok);
1482   if (FlagTok.is(tok::eod)) return false;
1483   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1484     return true;
1485 
1486   if (FlagVal == 1) {
1487     IsFileEntry = true;
1488 
1489     PP.Lex(FlagTok);
1490     if (FlagTok.is(tok::eod)) return false;
1491     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1492       return true;
1493   } else if (FlagVal == 2) {
1494     IsFileExit = true;
1495 
1496     SourceManager &SM = PP.getSourceManager();
1497     // If we are leaving the current presumed file, check to make sure the
1498     // presumed include stack isn't empty!
1499     FileID CurFileID =
1500       SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
1501     PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
1502     if (PLoc.isInvalid())
1503       return true;
1504 
1505     // If there is no include loc (main file) or if the include loc is in a
1506     // different physical file, then we aren't in a "1" line marker flag region.
1507     SourceLocation IncLoc = PLoc.getIncludeLoc();
1508     if (IncLoc.isInvalid() ||
1509         SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
1510       PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
1511       PP.DiscardUntilEndOfDirective();
1512       return true;
1513     }
1514 
1515     PP.Lex(FlagTok);
1516     if (FlagTok.is(tok::eod)) return false;
1517     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1518       return true;
1519   }
1520 
1521   // We must have 3 if there are still flags.
1522   if (FlagVal != 3) {
1523     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1524     PP.DiscardUntilEndOfDirective();
1525     return true;
1526   }
1527 
1528   FileKind = SrcMgr::C_System;
1529 
1530   PP.Lex(FlagTok);
1531   if (FlagTok.is(tok::eod)) return false;
1532   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1533     return true;
1534 
1535   // We must have 4 if there is yet another flag.
1536   if (FlagVal != 4) {
1537     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1538     PP.DiscardUntilEndOfDirective();
1539     return true;
1540   }
1541 
1542   FileKind = SrcMgr::C_ExternCSystem;
1543 
1544   PP.Lex(FlagTok);
1545   if (FlagTok.is(tok::eod)) return false;
1546 
1547   // There are no more valid flags here.
1548   PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1549   PP.DiscardUntilEndOfDirective();
1550   return true;
1551 }
1552 
1553 /// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1554 /// one of the following forms:
1555 ///
1556 ///     # 42
1557 ///     # 42 "file" ('1' | '2')?
1558 ///     # 42 "file" ('1' | '2')? '3' '4'?
1559 ///
HandleDigitDirective(Token & DigitTok)1560 void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1561   // Validate the number and convert it to an unsigned.  GNU does not have a
1562   // line # limit other than it fit in 32-bits.
1563   unsigned LineNo;
1564   if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
1565                    *this, true))
1566     return;
1567 
1568   Token StrTok;
1569   Lex(StrTok);
1570 
1571   bool IsFileEntry = false, IsFileExit = false;
1572   int FilenameID = -1;
1573   SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1574 
1575   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1576   // string followed by eod.
1577   if (StrTok.is(tok::eod)) {
1578     Diag(StrTok, diag::ext_pp_gnu_line_directive);
1579     // Treat this like "#line NN", which doesn't change file characteristics.
1580     FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1581   } else if (StrTok.isNot(tok::string_literal)) {
1582     Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1583     DiscardUntilEndOfDirective();
1584     return;
1585   } else if (StrTok.hasUDSuffix()) {
1586     Diag(StrTok, diag::err_invalid_string_udl);
1587     DiscardUntilEndOfDirective();
1588     return;
1589   } else {
1590     // Parse and validate the string, converting it into a unique ID.
1591     StringLiteralParser Literal(StrTok, *this);
1592     assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1593     if (Literal.hadError) {
1594       DiscardUntilEndOfDirective();
1595       return;
1596     }
1597     if (Literal.Pascal) {
1598       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1599       DiscardUntilEndOfDirective();
1600       return;
1601     }
1602 
1603     // If a filename was present, read any flags that are present.
1604     if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
1605       return;
1606     if (!SourceMgr.isWrittenInBuiltinFile(DigitTok.getLocation()) &&
1607         !SourceMgr.isWrittenInCommandLineFile(DigitTok.getLocation()))
1608       Diag(StrTok, diag::ext_pp_gnu_line_directive);
1609 
1610     // Exiting to an empty string means pop to the including file, so leave
1611     // FilenameID as -1 in that case.
1612     if (!(IsFileExit && Literal.GetString().empty()))
1613       FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1614   }
1615 
1616   // Create a line note with this information.
1617   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1618                         IsFileExit, FileKind);
1619 
1620   // If the preprocessor has callbacks installed, notify them of the #line
1621   // change.  This is used so that the line marker comes out in -E mode for
1622   // example.
1623   if (Callbacks) {
1624     PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1625     if (IsFileEntry)
1626       Reason = PPCallbacks::EnterFile;
1627     else if (IsFileExit)
1628       Reason = PPCallbacks::ExitFile;
1629 
1630     Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
1631   }
1632 }
1633 
1634 /// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1635 ///
HandleUserDiagnosticDirective(Token & Tok,bool isWarning)1636 void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1637                                                  bool isWarning) {
1638   // Read the rest of the line raw.  We do this because we don't want macros
1639   // to be expanded and we don't require that the tokens be valid preprocessing
1640   // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
1641   // collapse multiple consecutive white space between tokens, but this isn't
1642   // specified by the standard.
1643   SmallString<128> Message;
1644   CurLexer->ReadToEndOfLine(&Message);
1645 
1646   // Find the first non-whitespace character, so that we can make the
1647   // diagnostic more succinct.
1648   StringRef Msg = Message.str().ltrim(' ');
1649 
1650   if (isWarning)
1651     Diag(Tok, diag::pp_hash_warning) << Msg;
1652   else
1653     Diag(Tok, diag::err_pp_hash_error) << Msg;
1654 }
1655 
1656 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1657 ///
HandleIdentSCCSDirective(Token & Tok)1658 void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1659   // Yes, this directive is an extension.
1660   Diag(Tok, diag::ext_pp_ident_directive);
1661 
1662   // Read the string argument.
1663   Token StrTok;
1664   Lex(StrTok);
1665 
1666   // If the token kind isn't a string, it's a malformed directive.
1667   if (StrTok.isNot(tok::string_literal) &&
1668       StrTok.isNot(tok::wide_string_literal)) {
1669     Diag(StrTok, diag::err_pp_malformed_ident);
1670     if (StrTok.isNot(tok::eod))
1671       DiscardUntilEndOfDirective();
1672     return;
1673   }
1674 
1675   if (StrTok.hasUDSuffix()) {
1676     Diag(StrTok, diag::err_invalid_string_udl);
1677     DiscardUntilEndOfDirective();
1678     return;
1679   }
1680 
1681   // Verify that there is nothing after the string, other than EOD.
1682   CheckEndOfDirective("ident");
1683 
1684   if (Callbacks) {
1685     bool Invalid = false;
1686     std::string Str = getSpelling(StrTok, &Invalid);
1687     if (!Invalid)
1688       Callbacks->Ident(Tok.getLocation(), Str);
1689   }
1690 }
1691 
1692 /// Handle a #public directive.
HandleMacroPublicDirective(Token & Tok)1693 void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1694   Token MacroNameTok;
1695   ReadMacroName(MacroNameTok, MU_Undef);
1696 
1697   // Error reading macro name?  If so, diagnostic already issued.
1698   if (MacroNameTok.is(tok::eod))
1699     return;
1700 
1701   // Check to see if this is the last token on the #__public_macro line.
1702   CheckEndOfDirective("__public_macro");
1703 
1704   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1705   // Okay, we finally have a valid identifier to undef.
1706   MacroDirective *MD = getLocalMacroDirective(II);
1707 
1708   // If the macro is not defined, this is an error.
1709   if (!MD) {
1710     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1711     return;
1712   }
1713 
1714   // Note that this macro has now been exported.
1715   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1716                                 MacroNameTok.getLocation(), /*isPublic=*/true));
1717 }
1718 
1719 /// Handle a #private directive.
HandleMacroPrivateDirective()1720 void Preprocessor::HandleMacroPrivateDirective() {
1721   Token MacroNameTok;
1722   ReadMacroName(MacroNameTok, MU_Undef);
1723 
1724   // Error reading macro name?  If so, diagnostic already issued.
1725   if (MacroNameTok.is(tok::eod))
1726     return;
1727 
1728   // Check to see if this is the last token on the #__private_macro line.
1729   CheckEndOfDirective("__private_macro");
1730 
1731   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1732   // Okay, we finally have a valid identifier to undef.
1733   MacroDirective *MD = getLocalMacroDirective(II);
1734 
1735   // If the macro is not defined, this is an error.
1736   if (!MD) {
1737     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1738     return;
1739   }
1740 
1741   // Note that this macro has now been marked private.
1742   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1743                                MacroNameTok.getLocation(), /*isPublic=*/false));
1744 }
1745 
1746 //===----------------------------------------------------------------------===//
1747 // Preprocessor Include Directive Handling.
1748 //===----------------------------------------------------------------------===//
1749 
1750 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1751 /// checked and spelled filename, e.g. as an operand of \#include. This returns
1752 /// true if the input filename was in <>'s or false if it were in ""'s.  The
1753 /// caller is expected to provide a buffer that is large enough to hold the
1754 /// spelling of the filename, but is also expected to handle the case when
1755 /// this method decides to use a different buffer.
GetIncludeFilenameSpelling(SourceLocation Loc,StringRef & Buffer)1756 bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1757                                               StringRef &Buffer) {
1758   // Get the text form of the filename.
1759   assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1760 
1761   // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1762   // C++20 [lex.header]/2:
1763   //
1764   // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1765   //   in C: behavior is undefined
1766   //   in C++: program is conditionally-supported with implementation-defined
1767   //           semantics
1768 
1769   // Make sure the filename is <x> or "x".
1770   bool isAngled;
1771   if (Buffer[0] == '<') {
1772     if (Buffer.back() != '>') {
1773       Diag(Loc, diag::err_pp_expects_filename);
1774       Buffer = StringRef();
1775       return true;
1776     }
1777     isAngled = true;
1778   } else if (Buffer[0] == '"') {
1779     if (Buffer.back() != '"') {
1780       Diag(Loc, diag::err_pp_expects_filename);
1781       Buffer = StringRef();
1782       return true;
1783     }
1784     isAngled = false;
1785   } else {
1786     Diag(Loc, diag::err_pp_expects_filename);
1787     Buffer = StringRef();
1788     return true;
1789   }
1790 
1791   // Diagnose #include "" as invalid.
1792   if (Buffer.size() <= 2) {
1793     Diag(Loc, diag::err_pp_empty_filename);
1794     Buffer = StringRef();
1795     return true;
1796   }
1797 
1798   // Skip the brackets.
1799   Buffer = Buffer.substr(1, Buffer.size()-2);
1800   return isAngled;
1801 }
1802 
1803 /// Push a token onto the token stream containing an annotation.
EnterAnnotationToken(SourceRange Range,tok::TokenKind Kind,void * AnnotationVal)1804 void Preprocessor::EnterAnnotationToken(SourceRange Range,
1805                                         tok::TokenKind Kind,
1806                                         void *AnnotationVal) {
1807   // FIXME: Produce this as the current token directly, rather than
1808   // allocating a new token for it.
1809   auto Tok = std::make_unique<Token[]>(1);
1810   Tok[0].startToken();
1811   Tok[0].setKind(Kind);
1812   Tok[0].setLocation(Range.getBegin());
1813   Tok[0].setAnnotationEndLoc(Range.getEnd());
1814   Tok[0].setAnnotationValue(AnnotationVal);
1815   EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
1816 }
1817 
1818 /// Produce a diagnostic informing the user that a #include or similar
1819 /// was implicitly treated as a module import.
diagnoseAutoModuleImport(Preprocessor & PP,SourceLocation HashLoc,Token & IncludeTok,ArrayRef<std::pair<IdentifierInfo *,SourceLocation>> Path,SourceLocation PathEnd)1820 static void diagnoseAutoModuleImport(
1821     Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
1822     ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
1823     SourceLocation PathEnd) {
1824   SmallString<128> PathString;
1825   for (size_t I = 0, N = Path.size(); I != N; ++I) {
1826     if (I)
1827       PathString += '.';
1828     PathString += Path[I].first->getName();
1829   }
1830 
1831   int IncludeKind = 0;
1832   switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1833   case tok::pp_include:
1834     IncludeKind = 0;
1835     break;
1836 
1837   case tok::pp_import:
1838     IncludeKind = 1;
1839     break;
1840 
1841   case tok::pp_include_next:
1842     IncludeKind = 2;
1843     break;
1844 
1845   case tok::pp___include_macros:
1846     IncludeKind = 3;
1847     break;
1848 
1849   default:
1850     llvm_unreachable("unknown include directive kind");
1851   }
1852 
1853   PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation)
1854       << IncludeKind << PathString;
1855 }
1856 
1857 // Given a vector of path components and a string containing the real
1858 // path to the file, build a properly-cased replacement in the vector,
1859 // and return true if the replacement should be suggested.
trySimplifyPath(SmallVectorImpl<StringRef> & Components,StringRef RealPathName,llvm::sys::path::Style Separator)1860 static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1861                             StringRef RealPathName,
1862                             llvm::sys::path::Style Separator) {
1863   auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
1864   auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
1865   int Cnt = 0;
1866   bool SuggestReplacement = false;
1867 
1868   auto IsSep = [Separator](StringRef Component) {
1869     return Component.size() == 1 &&
1870            llvm::sys::path::is_separator(Component[0], Separator);
1871   };
1872 
1873   // Below is a best-effort to handle ".." in paths. It is admittedly
1874   // not 100% correct in the presence of symlinks.
1875   for (auto &Component : llvm::reverse(Components)) {
1876     if ("." == Component) {
1877     } else if (".." == Component) {
1878       ++Cnt;
1879     } else if (Cnt) {
1880       --Cnt;
1881     } else if (RealPathComponentIter != RealPathComponentEnd) {
1882       if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
1883           Component != *RealPathComponentIter) {
1884         // If these non-separator path components differ by more than just case,
1885         // then we may be looking at symlinked paths. Bail on this diagnostic to
1886         // avoid noisy false positives.
1887         SuggestReplacement =
1888             RealPathComponentIter->equals_insensitive(Component);
1889         if (!SuggestReplacement)
1890           break;
1891         Component = *RealPathComponentIter;
1892       }
1893       ++RealPathComponentIter;
1894     }
1895   }
1896   return SuggestReplacement;
1897 }
1898 
checkModuleIsAvailable(const LangOptions & LangOpts,const TargetInfo & TargetInfo,const Module & M,DiagnosticsEngine & Diags)1899 bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
1900                                           const TargetInfo &TargetInfo,
1901                                           const Module &M,
1902                                           DiagnosticsEngine &Diags) {
1903   Module::Requirement Requirement;
1904   Module::UnresolvedHeaderDirective MissingHeader;
1905   Module *ShadowingModule = nullptr;
1906   if (M.isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
1907                     ShadowingModule))
1908     return false;
1909 
1910   if (MissingHeader.FileNameLoc.isValid()) {
1911     Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
1912         << MissingHeader.IsUmbrella << MissingHeader.FileName;
1913   } else if (ShadowingModule) {
1914     Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name;
1915     Diags.Report(ShadowingModule->DefinitionLoc,
1916                  diag::note_previous_definition);
1917   } else {
1918     // FIXME: Track the location at which the requirement was specified, and
1919     // use it here.
1920     Diags.Report(M.DefinitionLoc, diag::err_module_unavailable)
1921         << M.getFullModuleName() << Requirement.second << Requirement.first;
1922   }
1923   return true;
1924 }
1925 
1926 std::pair<ConstSearchDirIterator, const FileEntry *>
getIncludeNextStart(const Token & IncludeNextTok) const1927 Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
1928   // #include_next is like #include, except that we start searching after
1929   // the current found directory.  If we can't do this, issue a
1930   // diagnostic.
1931   ConstSearchDirIterator Lookup = CurDirLookup;
1932   const FileEntry *LookupFromFile = nullptr;
1933 
1934   if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
1935     // If the main file is a header, then it's either for PCH/AST generation,
1936     // or libclang opened it. Either way, handle it as a normal include below
1937     // and do not complain about include_next.
1938   } else if (isInPrimaryFile()) {
1939     Lookup = nullptr;
1940     Diag(IncludeNextTok, diag::pp_include_next_in_primary);
1941   } else if (CurLexerSubmodule) {
1942     // Start looking up in the directory *after* the one in which the current
1943     // file would be found, if any.
1944     assert(CurPPLexer && "#include_next directive in macro?");
1945     if (auto FE = CurPPLexer->getFileEntry())
1946       LookupFromFile = *FE;
1947     Lookup = nullptr;
1948   } else if (!Lookup) {
1949     // The current file was not found by walking the include path. Either it
1950     // is the primary file (handled above), or it was found by absolute path,
1951     // or it was found relative to such a file.
1952     // FIXME: Track enough information so we know which case we're in.
1953     Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
1954   } else {
1955     // Start looking up in the next directory.
1956     ++Lookup;
1957   }
1958 
1959   return {Lookup, LookupFromFile};
1960 }
1961 
1962 /// HandleIncludeDirective - The "\#include" tokens have just been read, read
1963 /// the file to be included from the lexer, then include it!  This is a common
1964 /// routine with functionality shared between \#include, \#include_next and
1965 /// \#import.  LookupFrom is set when this is a \#include_next directive, it
1966 /// specifies the file to start searching from.
HandleIncludeDirective(SourceLocation HashLoc,Token & IncludeTok,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)1967 void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
1968                                           Token &IncludeTok,
1969                                           ConstSearchDirIterator LookupFrom,
1970                                           const FileEntry *LookupFromFile) {
1971   Token FilenameTok;
1972   if (LexHeaderName(FilenameTok))
1973     return;
1974 
1975   if (FilenameTok.isNot(tok::header_name)) {
1976     Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
1977     if (FilenameTok.isNot(tok::eod))
1978       DiscardUntilEndOfDirective();
1979     return;
1980   }
1981 
1982   // Verify that there is nothing after the filename, other than EOD.  Note
1983   // that we allow macros that expand to nothing after the filename, because
1984   // this falls into the category of "#include pp-tokens new-line" specified
1985   // in C99 6.10.2p4.
1986   SourceLocation EndLoc =
1987       CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
1988 
1989   auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
1990                                             EndLoc, LookupFrom, LookupFromFile);
1991   switch (Action.Kind) {
1992   case ImportAction::None:
1993   case ImportAction::SkippedModuleImport:
1994     break;
1995   case ImportAction::ModuleBegin:
1996     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
1997                          tok::annot_module_begin, Action.ModuleForHeader);
1998     break;
1999   case ImportAction::HeaderUnitImport:
2000     EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
2001                          Action.ModuleForHeader);
2002     break;
2003   case ImportAction::ModuleImport:
2004     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2005                          tok::annot_module_include, Action.ModuleForHeader);
2006     break;
2007   case ImportAction::Failure:
2008     assert(TheModuleLoader.HadFatalFailure &&
2009            "This should be an early exit only to a fatal error");
2010     TheModuleLoader.HadFatalFailure = true;
2011     IncludeTok.setKind(tok::eof);
2012     CurLexer->cutOffLexing();
2013     return;
2014   }
2015 }
2016 
LookupHeaderIncludeOrImport(ConstSearchDirIterator * CurDir,StringRef & Filename,SourceLocation FilenameLoc,CharSourceRange FilenameRange,const Token & FilenameTok,bool & IsFrameworkFound,bool IsImportDecl,bool & IsMapped,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile,StringRef & LookupFilename,SmallVectorImpl<char> & RelativePath,SmallVectorImpl<char> & SearchPath,ModuleMap::KnownHeader & SuggestedModule,bool isAngled)2017 OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2018     ConstSearchDirIterator *CurDir, StringRef &Filename,
2019     SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2020     const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2021     bool &IsMapped, ConstSearchDirIterator LookupFrom,
2022     const FileEntry *LookupFromFile, StringRef &LookupFilename,
2023     SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2024     ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2025   auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2026     if (LangOpts.AsmPreprocessor)
2027       return;
2028 
2029     Module *RequestingModule = getModuleForLocation(
2030         FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
2031     bool RequestingModuleIsModuleInterface =
2032         !SourceMgr.isInMainFile(FilenameLoc);
2033 
2034     HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2035         RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2036         Filename, FE);
2037   };
2038 
2039   OptionalFileEntryRef File = LookupFile(
2040       FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir,
2041       Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2042       &SuggestedModule, &IsMapped, &IsFrameworkFound);
2043   if (File) {
2044     DiagnoseHeaderInclusion(*File);
2045     return File;
2046   }
2047 
2048   // Give the clients a chance to silently skip this include.
2049   if (Callbacks && Callbacks->FileNotFound(Filename))
2050     return std::nullopt;
2051 
2052   if (SuppressIncludeNotFoundError)
2053     return std::nullopt;
2054 
2055   // If the file could not be located and it was included via angle
2056   // brackets, we can attempt a lookup as though it were a quoted path to
2057   // provide the user with a possible fixit.
2058   if (isAngled) {
2059     OptionalFileEntryRef File = LookupFile(
2060         FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir,
2061         Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2062         &SuggestedModule, &IsMapped,
2063         /*IsFrameworkFound=*/nullptr);
2064     if (File) {
2065       DiagnoseHeaderInclusion(*File);
2066       Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
2067           << Filename << IsImportDecl
2068           << FixItHint::CreateReplacement(FilenameRange,
2069                                           "\"" + Filename.str() + "\"");
2070       return File;
2071     }
2072   }
2073 
2074   // Check for likely typos due to leading or trailing non-isAlphanumeric
2075   // characters
2076   StringRef OriginalFilename = Filename;
2077   if (LangOpts.SpellChecking) {
2078     // A heuristic to correct a typo file name by removing leading and
2079     // trailing non-isAlphanumeric characters.
2080     auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2081       Filename = Filename.drop_until(isAlphanumeric);
2082       while (!Filename.empty() && !isAlphanumeric(Filename.back())) {
2083         Filename = Filename.drop_back();
2084       }
2085       return Filename;
2086     };
2087     StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2088     StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2089 
2090     OptionalFileEntryRef File = LookupFile(
2091         FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom,
2092         LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr,
2093         Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
2094         /*IsFrameworkFound=*/nullptr);
2095     if (File) {
2096       DiagnoseHeaderInclusion(*File);
2097       auto Hint =
2098           isAngled ? FixItHint::CreateReplacement(
2099                          FilenameRange, "<" + TypoCorrectionName.str() + ">")
2100                    : FixItHint::CreateReplacement(
2101                          FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
2102       Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
2103           << OriginalFilename << TypoCorrectionName << Hint;
2104       // We found the file, so set the Filename to the name after typo
2105       // correction.
2106       Filename = TypoCorrectionName;
2107       LookupFilename = TypoCorrectionLookupName;
2108       return File;
2109     }
2110   }
2111 
2112   // If the file is still not found, just go with the vanilla diagnostic
2113   assert(!File && "expected missing file");
2114   Diag(FilenameTok, diag::err_pp_file_not_found)
2115       << OriginalFilename << FilenameRange;
2116   if (IsFrameworkFound) {
2117     size_t SlashPos = OriginalFilename.find('/');
2118     assert(SlashPos != StringRef::npos &&
2119            "Include with framework name should have '/' in the filename");
2120     StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
2121     FrameworkCacheEntry &CacheEntry =
2122         HeaderInfo.LookupFrameworkCache(FrameworkName);
2123     assert(CacheEntry.Directory && "Found framework should be in cache");
2124     Diag(FilenameTok, diag::note_pp_framework_without_header)
2125         << OriginalFilename.substr(SlashPos + 1) << FrameworkName
2126         << CacheEntry.Directory->getName();
2127   }
2128 
2129   return std::nullopt;
2130 }
2131 
2132 /// Handle either a #include-like directive or an import declaration that names
2133 /// a header file.
2134 ///
2135 /// \param HashLoc The location of the '#' token for an include, or
2136 ///        SourceLocation() for an import declaration.
2137 /// \param IncludeTok The include / include_next / import token.
2138 /// \param FilenameTok The header-name token.
2139 /// \param EndLoc The location at which any imported macros become visible.
2140 /// \param LookupFrom For #include_next, the starting directory for the
2141 ///        directory lookup.
2142 /// \param LookupFromFile For #include_next, the starting file for the directory
2143 ///        lookup.
HandleHeaderIncludeOrImport(SourceLocation HashLoc,Token & IncludeTok,Token & FilenameTok,SourceLocation EndLoc,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)2144 Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2145     SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2146     SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2147     const FileEntry *LookupFromFile) {
2148   SmallString<128> FilenameBuffer;
2149   StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
2150   SourceLocation CharEnd = FilenameTok.getEndLoc();
2151 
2152   CharSourceRange FilenameRange
2153     = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
2154   StringRef OriginalFilename = Filename;
2155   bool isAngled =
2156     GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
2157 
2158   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2159   // error.
2160   if (Filename.empty())
2161     return {ImportAction::None};
2162 
2163   bool IsImportDecl = HashLoc.isInvalid();
2164   SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2165 
2166   // Complain about attempts to #include files in an audit pragma.
2167   if (PragmaARCCFCodeAuditedInfo.second.isValid()) {
2168     Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2169     Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here);
2170 
2171     // Immediately leave the pragma.
2172     PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
2173   }
2174 
2175   // Complain about attempts to #include files in an assume-nonnull pragma.
2176   if (PragmaAssumeNonNullLoc.isValid()) {
2177     Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2178     Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
2179 
2180     // Immediately leave the pragma.
2181     PragmaAssumeNonNullLoc = SourceLocation();
2182   }
2183 
2184   if (HeaderInfo.HasIncludeAliasMap()) {
2185     // Map the filename with the brackets still attached.  If the name doesn't
2186     // map to anything, fall back on the filename we've already gotten the
2187     // spelling for.
2188     StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
2189     if (!NewName.empty())
2190       Filename = NewName;
2191   }
2192 
2193   // Search include directories.
2194   bool IsMapped = false;
2195   bool IsFrameworkFound = false;
2196   ConstSearchDirIterator CurDir = nullptr;
2197   SmallString<1024> SearchPath;
2198   SmallString<1024> RelativePath;
2199   // We get the raw path only if we have 'Callbacks' to which we later pass
2200   // the path.
2201   ModuleMap::KnownHeader SuggestedModule;
2202   SourceLocation FilenameLoc = FilenameTok.getLocation();
2203   StringRef LookupFilename = Filename;
2204 
2205   // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2206   // is unnecessary on Windows since the filesystem there handles backslashes.
2207   SmallString<128> NormalizedPath;
2208   llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2209   if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
2210     NormalizedPath = Filename.str();
2211     llvm::sys::path::native(NormalizedPath);
2212     LookupFilename = NormalizedPath;
2213     BackslashStyle = llvm::sys::path::Style::windows;
2214   }
2215 
2216   OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2217       &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2218       IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2219       LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2220 
2221   if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2222     if (File && isPCHThroughHeader(&File->getFileEntry()))
2223       SkippingUntilPCHThroughHeader = false;
2224     return {ImportAction::None};
2225   }
2226 
2227   // Should we enter the source file? Set to Skip if either the source file is
2228   // known to have no effect beyond its effect on module visibility -- that is,
2229   // if it's got an include guard that is already defined, set to Import if it
2230   // is a modular header we've already built and should import.
2231 
2232   // For C++20 Modules
2233   // [cpp.include]/7 If the header identified by the header-name denotes an
2234   // importable header, it is implementation-defined whether the #include
2235   // preprocessing directive is instead replaced by an import directive.
2236   // For this implementation, the translation is permitted when we are parsing
2237   // the Global Module Fragment, and not otherwise (the cases where it would be
2238   // valid to replace an include with an import are highly constrained once in
2239   // named module purview; this choice avoids considerable complexity in
2240   // determining valid cases).
2241 
2242   enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2243 
2244   if (PPOpts->SingleFileParseMode)
2245     Action = IncludeLimitReached;
2246 
2247   // If we've reached the max allowed include depth, it is usually due to an
2248   // include cycle. Don't enter already processed files again as it can lead to
2249   // reaching the max allowed include depth again.
2250   if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2251       alreadyIncluded(*File))
2252     Action = IncludeLimitReached;
2253 
2254   // FIXME: We do not have a good way to disambiguate C++ clang modules from
2255   // C++ standard modules (other than use/non-use of Header Units).
2256   Module *SM = SuggestedModule.getModule();
2257 
2258   bool MaybeTranslateInclude =
2259       Action == Enter && File && SM && !SM->isForBuilding(getLangOpts());
2260 
2261   // Maybe a usable Header Unit
2262   bool UsableHeaderUnit = false;
2263   if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) {
2264     if (TrackGMFState.inGMF() || IsImportDecl)
2265       UsableHeaderUnit = true;
2266     else if (!IsImportDecl) {
2267       // This is a Header Unit that we do not include-translate
2268       SuggestedModule = ModuleMap::KnownHeader();
2269       SM = nullptr;
2270     }
2271   }
2272   // Maybe a usable clang header module.
2273   bool UsableClangHeaderModule =
2274       (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM &&
2275       !SM->isHeaderUnit();
2276 
2277   // Determine whether we should try to import the module for this #include, if
2278   // there is one. Don't do so if precompiled module support is disabled or we
2279   // are processing this module textually (because we're building the module).
2280   if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2281     // If this include corresponds to a module but that module is
2282     // unavailable, diagnose the situation and bail out.
2283     // FIXME: Remove this; loadModule does the same check (but produces
2284     // slightly worse diagnostics).
2285     if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(),
2286                                *SuggestedModule.getModule(),
2287                                getDiagnostics())) {
2288       Diag(FilenameTok.getLocation(),
2289            diag::note_implicit_top_level_module_import_here)
2290           << SuggestedModule.getModule()->getTopLevelModuleName();
2291       return {ImportAction::None};
2292     }
2293 
2294     // Compute the module access path corresponding to this module.
2295     // FIXME: Should we have a second loadModule() overload to avoid this
2296     // extra lookup step?
2297     SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
2298     for (Module *Mod = SM; Mod; Mod = Mod->Parent)
2299       Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
2300                                     FilenameTok.getLocation()));
2301     std::reverse(Path.begin(), Path.end());
2302 
2303     // Warn that we're replacing the include/import with a module import.
2304     if (!IsImportDecl)
2305       diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
2306 
2307     // Load the module to import its macros. We'll make the declarations
2308     // visible when the parser gets here.
2309     // FIXME: Pass SuggestedModule in here rather than converting it to a path
2310     // and making the module loader convert it back again.
2311     ModuleLoadResult Imported = TheModuleLoader.loadModule(
2312         IncludeTok.getLocation(), Path, Module::Hidden,
2313         /*IsInclusionDirective=*/true);
2314     assert((Imported == nullptr || Imported == SuggestedModule.getModule()) &&
2315            "the imported module is different than the suggested one");
2316 
2317     if (Imported) {
2318       Action = Import;
2319     } else if (Imported.isMissingExpected()) {
2320       markClangModuleAsAffecting(
2321           static_cast<Module *>(Imported)->getTopLevelModule());
2322       // We failed to find a submodule that we assumed would exist (because it
2323       // was in the directory of an umbrella header, for instance), but no
2324       // actual module containing it exists (because the umbrella header is
2325       // incomplete).  Treat this as a textual inclusion.
2326       SuggestedModule = ModuleMap::KnownHeader();
2327       SM = nullptr;
2328     } else if (Imported.isConfigMismatch()) {
2329       // On a configuration mismatch, enter the header textually. We still know
2330       // that it's part of the corresponding module.
2331     } else {
2332       // We hit an error processing the import. Bail out.
2333       if (hadModuleLoaderFatalFailure()) {
2334         // With a fatal failure in the module loader, we abort parsing.
2335         Token &Result = IncludeTok;
2336         assert(CurLexer && "#include but no current lexer set!");
2337         Result.startToken();
2338         CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
2339         CurLexer->cutOffLexing();
2340       }
2341       return {ImportAction::None};
2342     }
2343   }
2344 
2345   // The #included file will be considered to be a system header if either it is
2346   // in a system include directory, or if the #includer is a system include
2347   // header.
2348   SrcMgr::CharacteristicKind FileCharacter =
2349       SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
2350   if (File)
2351     FileCharacter = std::max(HeaderInfo.getFileDirFlavor(*File), FileCharacter);
2352 
2353   // If this is a '#import' or an import-declaration, don't re-enter the file.
2354   //
2355   // FIXME: If we have a suggested module for a '#include', and we've already
2356   // visited this file, don't bother entering it again. We know it has no
2357   // further effect.
2358   bool EnterOnce =
2359       IsImportDecl ||
2360       IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2361 
2362   bool IsFirstIncludeOfFile = false;
2363 
2364   // Ask HeaderInfo if we should enter this #include file.  If not, #including
2365   // this file will have no effect.
2366   if (Action == Enter && File &&
2367       !HeaderInfo.ShouldEnterIncludeFile(*this, *File, EnterOnce,
2368                                          getLangOpts().Modules, SM,
2369                                          IsFirstIncludeOfFile)) {
2370     // C++ standard modules:
2371     // If we are not in the GMF, then we textually include only
2372     // clang modules:
2373     // Even if we've already preprocessed this header once and know that we
2374     // don't need to see its contents again, we still need to import it if it's
2375     // modular because we might not have imported it from this submodule before.
2376     //
2377     // FIXME: We don't do this when compiling a PCH because the AST
2378     // serialization layer can't cope with it. This means we get local
2379     // submodule visibility semantics wrong in that case.
2380     if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2381       Action = TrackGMFState.inGMF() ? Import : Skip;
2382     else
2383       Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
2384   }
2385 
2386   // Check for circular inclusion of the main file.
2387   // We can't generate a consistent preamble with regard to the conditional
2388   // stack if the main file is included again as due to the preamble bounds
2389   // some directives (e.g. #endif of a header guard) will never be seen.
2390   // Since this will lead to confusing errors, avoid the inclusion.
2391   if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2392       SourceMgr.isMainFile(File->getFileEntry())) {
2393     Diag(FilenameTok.getLocation(),
2394          diag::err_pp_including_mainfile_in_preamble);
2395     return {ImportAction::None};
2396   }
2397 
2398   if (Callbacks && !IsImportDecl) {
2399     // Notify the callback object that we've seen an inclusion directive.
2400     // FIXME: Use a different callback for a pp-import?
2401     Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled,
2402                                   FilenameRange, File, SearchPath, RelativePath,
2403                                   Action == Import ? SuggestedModule.getModule()
2404                                                    : nullptr,
2405                                   FileCharacter);
2406     if (Action == Skip && File)
2407       Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
2408   }
2409 
2410   if (!File)
2411     return {ImportAction::None};
2412 
2413   // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2414   // module corresponding to the named header.
2415   if (IsImportDecl && !SuggestedModule) {
2416     Diag(FilenameTok, diag::err_header_import_not_header_unit)
2417       << OriginalFilename << File->getName();
2418     return {ImportAction::None};
2419   }
2420 
2421   // Issue a diagnostic if the name of the file on disk has a different case
2422   // than the one we're about to open.
2423   const bool CheckIncludePathPortability =
2424       !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2425 
2426   if (CheckIncludePathPortability) {
2427     StringRef Name = LookupFilename;
2428     StringRef NameWithoriginalSlashes = Filename;
2429 #if defined(_WIN32)
2430     // Skip UNC prefix if present. (tryGetRealPathName() always
2431     // returns a path with the prefix skipped.)
2432     bool NameWasUNC = Name.consume_front("\\\\?\\");
2433     NameWithoriginalSlashes.consume_front("\\\\?\\");
2434 #endif
2435     StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2436     SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
2437                                           llvm::sys::path::end(Name));
2438 #if defined(_WIN32)
2439     // -Wnonportable-include-path is designed to diagnose includes using
2440     // case even on systems with a case-insensitive file system.
2441     // On Windows, RealPathName always starts with an upper-case drive
2442     // letter for absolute paths, but Name might start with either
2443     // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2444     // ("foo" will always have on-disk case, no matter which case was
2445     // used in the cd command). To not emit this warning solely for
2446     // the drive letter, whose case is dependent on if `cd` is used
2447     // with upper- or lower-case drive letters, always consider the
2448     // given drive letter case as correct for the purpose of this warning.
2449     SmallString<128> FixedDriveRealPath;
2450     if (llvm::sys::path::is_absolute(Name) &&
2451         llvm::sys::path::is_absolute(RealPathName) &&
2452         toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2453         isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2454       assert(Components.size() >= 3 && "should have drive, backslash, name");
2455       assert(Components[0].size() == 2 && "should start with drive");
2456       assert(Components[0][1] == ':' && "should have colon");
2457       FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2458       RealPathName = FixedDriveRealPath;
2459     }
2460 #endif
2461 
2462     if (trySimplifyPath(Components, RealPathName, BackslashStyle)) {
2463       SmallString<128> Path;
2464       Path.reserve(Name.size()+2);
2465       Path.push_back(isAngled ? '<' : '"');
2466 
2467       const auto IsSep = [BackslashStyle](char c) {
2468         return llvm::sys::path::is_separator(c, BackslashStyle);
2469       };
2470 
2471       for (auto Component : Components) {
2472         // On POSIX, Components will contain a single '/' as first element
2473         // exactly if Name is an absolute path.
2474         // On Windows, it will contain "C:" followed by '\' for absolute paths.
2475         // The drive letter is optional for absolute paths on Windows, but
2476         // clang currently cannot process absolute paths in #include lines that
2477         // don't have a drive.
2478         // If the first entry in Components is a directory separator,
2479         // then the code at the bottom of this loop that keeps the original
2480         // directory separator style copies it. If the second entry is
2481         // a directory separator (the C:\ case), then that separator already
2482         // got copied when the C: was processed and we want to skip that entry.
2483         if (!(Component.size() == 1 && IsSep(Component[0])))
2484           Path.append(Component);
2485         else if (Path.size() != 1)
2486           continue;
2487 
2488         // Append the separator(s) the user used, or the close quote
2489         if (Path.size() > NameWithoriginalSlashes.size()) {
2490           Path.push_back(isAngled ? '>' : '"');
2491           continue;
2492         }
2493         assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2494         do
2495           Path.push_back(NameWithoriginalSlashes[Path.size()-1]);
2496         while (Path.size() <= NameWithoriginalSlashes.size() &&
2497                IsSep(NameWithoriginalSlashes[Path.size()-1]));
2498       }
2499 
2500 #if defined(_WIN32)
2501       // Restore UNC prefix if it was there.
2502       if (NameWasUNC)
2503         Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2504 #endif
2505 
2506       // For user files and known standard headers, issue a diagnostic.
2507       // For other system headers, don't. They can be controlled separately.
2508       auto DiagId =
2509           (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
2510               ? diag::pp_nonportable_path
2511               : diag::pp_nonportable_system_path;
2512       Diag(FilenameTok, DiagId) << Path <<
2513         FixItHint::CreateReplacement(FilenameRange, Path);
2514     }
2515   }
2516 
2517   switch (Action) {
2518   case Skip:
2519     // If we don't need to enter the file, stop now.
2520     if (SM)
2521       return {ImportAction::SkippedModuleImport, SM};
2522     return {ImportAction::None};
2523 
2524   case IncludeLimitReached:
2525     // If we reached our include limit and don't want to enter any more files,
2526     // don't go any further.
2527     return {ImportAction::None};
2528 
2529   case Import: {
2530     // If this is a module import, make it visible if needed.
2531     assert(SM && "no module to import");
2532 
2533     makeModuleVisible(SM, EndLoc);
2534 
2535     if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2536         tok::pp___include_macros)
2537       return {ImportAction::None};
2538 
2539     return {ImportAction::ModuleImport, SM};
2540   }
2541 
2542   case Enter:
2543     break;
2544   }
2545 
2546   // Check that we don't have infinite #include recursion.
2547   if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2548     Diag(FilenameTok, diag::err_pp_include_too_deep);
2549     HasReachedMaxIncludeDepth = true;
2550     return {ImportAction::None};
2551   }
2552 
2553   if (isAngled && isInNamedModule())
2554     Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview)
2555         << getNamedModuleName();
2556 
2557   // Look up the file, create a File ID for it.
2558   SourceLocation IncludePos = FilenameTok.getLocation();
2559   // If the filename string was the result of macro expansions, set the include
2560   // position on the file where it will be included and after the expansions.
2561   if (IncludePos.isMacroID())
2562     IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
2563   FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);
2564   if (!FID.isValid()) {
2565     TheModuleLoader.HadFatalFailure = true;
2566     return ImportAction::Failure;
2567   }
2568 
2569   // If all is good, enter the new file!
2570   if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(),
2571                       IsFirstIncludeOfFile))
2572     return {ImportAction::None};
2573 
2574   // Determine if we're switching to building a new submodule, and which one.
2575   // This does not apply for C++20 modules header units.
2576   if (SM && !SM->isHeaderUnit()) {
2577     if (SM->getTopLevelModule()->ShadowingModule) {
2578       // We are building a submodule that belongs to a shadowed module. This
2579       // means we find header files in the shadowed module.
2580       Diag(SM->DefinitionLoc, diag::err_module_build_shadowed_submodule)
2581           << SM->getFullModuleName();
2582       Diag(SM->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2583            diag::note_previous_definition);
2584       return {ImportAction::None};
2585     }
2586     // When building a pch, -fmodule-name tells the compiler to textually
2587     // include headers in the specified module. We are not building the
2588     // specified module.
2589     //
2590     // FIXME: This is the wrong way to handle this. We should produce a PCH
2591     // that behaves the same as the header would behave in a compilation using
2592     // that PCH, which means we should enter the submodule. We need to teach
2593     // the AST serialization layer to deal with the resulting AST.
2594     if (getLangOpts().CompilingPCH && SM->isForBuilding(getLangOpts()))
2595       return {ImportAction::None};
2596 
2597     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2598     CurLexerSubmodule = SM;
2599 
2600     // Let the macro handling code know that any future macros are within
2601     // the new submodule.
2602     EnterSubmodule(SM, EndLoc, /*ForPragma*/ false);
2603 
2604     // Let the parser know that any future declarations are within the new
2605     // submodule.
2606     // FIXME: There's no point doing this if we're handling a #__include_macros
2607     // directive.
2608     return {ImportAction::ModuleBegin, SM};
2609   }
2610 
2611   assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2612   return {ImportAction::None};
2613 }
2614 
2615 /// HandleIncludeNextDirective - Implements \#include_next.
2616 ///
HandleIncludeNextDirective(SourceLocation HashLoc,Token & IncludeNextTok)2617 void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2618                                               Token &IncludeNextTok) {
2619   Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2620 
2621   ConstSearchDirIterator Lookup = nullptr;
2622   const FileEntry *LookupFromFile;
2623   std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2624 
2625   return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,
2626                                 LookupFromFile);
2627 }
2628 
2629 /// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
HandleMicrosoftImportDirective(Token & Tok)2630 void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2631   // The Microsoft #import directive takes a type library and generates header
2632   // files from it, and includes those.  This is beyond the scope of what clang
2633   // does, so we ignore it and error out.  However, #import can optionally have
2634   // trailing attributes that span multiple lines.  We're going to eat those
2635   // so we can continue processing from there.
2636   Diag(Tok, diag::err_pp_import_directive_ms );
2637 
2638   // Read tokens until we get to the end of the directive.  Note that the
2639   // directive can be split over multiple lines using the backslash character.
2640   DiscardUntilEndOfDirective();
2641 }
2642 
2643 /// HandleImportDirective - Implements \#import.
2644 ///
HandleImportDirective(SourceLocation HashLoc,Token & ImportTok)2645 void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2646                                          Token &ImportTok) {
2647   if (!LangOpts.ObjC) {  // #import is standard for ObjC.
2648     if (LangOpts.MSVCCompat)
2649       return HandleMicrosoftImportDirective(ImportTok);
2650     Diag(ImportTok, diag::ext_pp_import_directive);
2651   }
2652   return HandleIncludeDirective(HashLoc, ImportTok);
2653 }
2654 
2655 /// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2656 /// pseudo directive in the predefines buffer.  This handles it by sucking all
2657 /// tokens through the preprocessor and discarding them (only keeping the side
2658 /// effects on the preprocessor).
HandleIncludeMacrosDirective(SourceLocation HashLoc,Token & IncludeMacrosTok)2659 void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2660                                                 Token &IncludeMacrosTok) {
2661   // This directive should only occur in the predefines buffer.  If not, emit an
2662   // error and reject it.
2663   SourceLocation Loc = IncludeMacrosTok.getLocation();
2664   if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2665     Diag(IncludeMacrosTok.getLocation(),
2666          diag::pp_include_macros_out_of_predefines);
2667     DiscardUntilEndOfDirective();
2668     return;
2669   }
2670 
2671   // Treat this as a normal #include for checking purposes.  If this is
2672   // successful, it will push a new lexer onto the include stack.
2673   HandleIncludeDirective(HashLoc, IncludeMacrosTok);
2674 
2675   Token TmpTok;
2676   do {
2677     Lex(TmpTok);
2678     assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2679   } while (TmpTok.isNot(tok::hashhash));
2680 }
2681 
2682 //===----------------------------------------------------------------------===//
2683 // Preprocessor Macro Directive Handling.
2684 //===----------------------------------------------------------------------===//
2685 
2686 /// ReadMacroParameterList - The ( starting a parameter list of a macro
2687 /// definition has just been read.  Lex the rest of the parameters and the
2688 /// closing ), updating MI with what we learn.  Return true if an error occurs
2689 /// parsing the param list.
ReadMacroParameterList(MacroInfo * MI,Token & Tok)2690 bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2691   SmallVector<IdentifierInfo*, 32> Parameters;
2692 
2693   while (true) {
2694     LexUnexpandedNonComment(Tok);
2695     switch (Tok.getKind()) {
2696     case tok::r_paren:
2697       // Found the end of the parameter list.
2698       if (Parameters.empty())  // #define FOO()
2699         return false;
2700       // Otherwise we have #define FOO(A,)
2701       Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2702       return true;
2703     case tok::ellipsis:  // #define X(... -> C99 varargs
2704       if (!LangOpts.C99)
2705         Diag(Tok, LangOpts.CPlusPlus11 ?
2706              diag::warn_cxx98_compat_variadic_macro :
2707              diag::ext_variadic_macro);
2708 
2709       // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2710       if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2711         Diag(Tok, diag::ext_pp_opencl_variadic_macros);
2712       }
2713 
2714       // Lex the token after the identifier.
2715       LexUnexpandedNonComment(Tok);
2716       if (Tok.isNot(tok::r_paren)) {
2717         Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2718         return true;
2719       }
2720       // Add the __VA_ARGS__ identifier as a parameter.
2721       Parameters.push_back(Ident__VA_ARGS__);
2722       MI->setIsC99Varargs();
2723       MI->setParameterList(Parameters, BP);
2724       return false;
2725     case tok::eod:  // #define X(
2726       Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2727       return true;
2728     default:
2729       // Handle keywords and identifiers here to accept things like
2730       // #define Foo(for) for.
2731       IdentifierInfo *II = Tok.getIdentifierInfo();
2732       if (!II) {
2733         // #define X(1
2734         Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2735         return true;
2736       }
2737 
2738       // If this is already used as a parameter, it is used multiple times (e.g.
2739       // #define X(A,A.
2740       if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6
2741         Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
2742         return true;
2743       }
2744 
2745       // Add the parameter to the macro info.
2746       Parameters.push_back(II);
2747 
2748       // Lex the token after the identifier.
2749       LexUnexpandedNonComment(Tok);
2750 
2751       switch (Tok.getKind()) {
2752       default:          // #define X(A B
2753         Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2754         return true;
2755       case tok::r_paren: // #define X(A)
2756         MI->setParameterList(Parameters, BP);
2757         return false;
2758       case tok::comma:  // #define X(A,
2759         break;
2760       case tok::ellipsis:  // #define X(A... -> GCC extension
2761         // Diagnose extension.
2762         Diag(Tok, diag::ext_named_variadic_macro);
2763 
2764         // Lex the token after the identifier.
2765         LexUnexpandedNonComment(Tok);
2766         if (Tok.isNot(tok::r_paren)) {
2767           Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2768           return true;
2769         }
2770 
2771         MI->setIsGNUVarargs();
2772         MI->setParameterList(Parameters, BP);
2773         return false;
2774       }
2775     }
2776   }
2777 }
2778 
isConfigurationPattern(Token & MacroName,MacroInfo * MI,const LangOptions & LOptions)2779 static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2780                                    const LangOptions &LOptions) {
2781   if (MI->getNumTokens() == 1) {
2782     const Token &Value = MI->getReplacementToken(0);
2783 
2784     // Macro that is identity, like '#define inline inline' is a valid pattern.
2785     if (MacroName.getKind() == Value.getKind())
2786       return true;
2787 
2788     // Macro that maps a keyword to the same keyword decorated with leading/
2789     // trailing underscores is a valid pattern:
2790     //    #define inline __inline
2791     //    #define inline __inline__
2792     //    #define inline _inline (in MS compatibility mode)
2793     StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2794     if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2795       if (!II->isKeyword(LOptions))
2796         return false;
2797       StringRef ValueText = II->getName();
2798       StringRef TrimmedValue = ValueText;
2799       if (!ValueText.starts_with("__")) {
2800         if (ValueText.starts_with("_"))
2801           TrimmedValue = TrimmedValue.drop_front(1);
2802         else
2803           return false;
2804       } else {
2805         TrimmedValue = TrimmedValue.drop_front(2);
2806         if (TrimmedValue.ends_with("__"))
2807           TrimmedValue = TrimmedValue.drop_back(2);
2808       }
2809       return TrimmedValue.equals(MacroText);
2810     } else {
2811       return false;
2812     }
2813   }
2814 
2815   // #define inline
2816   return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
2817                            tok::kw_const) &&
2818          MI->getNumTokens() == 0;
2819 }
2820 
2821 // ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2822 // entire line) of the macro's tokens and adds them to MacroInfo, and while
2823 // doing so performs certain validity checks including (but not limited to):
2824 //   - # (stringization) is followed by a macro parameter
2825 //
2826 //  Returns a nullptr if an invalid sequence of tokens is encountered or returns
2827 //  a pointer to a MacroInfo object.
2828 
ReadOptionalMacroParameterListAndBody(const Token & MacroNameTok,const bool ImmediatelyAfterHeaderGuard)2829 MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2830     const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2831 
2832   Token LastTok = MacroNameTok;
2833   // Create the new macro.
2834   MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
2835 
2836   Token Tok;
2837   LexUnexpandedToken(Tok);
2838 
2839   // Ensure we consume the rest of the macro body if errors occur.
2840   auto _ = llvm::make_scope_exit([&]() {
2841     // The flag indicates if we are still waiting for 'eod'.
2842     if (CurLexer->ParsingPreprocessorDirective)
2843       DiscardUntilEndOfDirective();
2844   });
2845 
2846   // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2847   // within their appropriate context.
2848   VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2849 
2850   // If this is a function-like macro definition, parse the argument list,
2851   // marking each of the identifiers as being used as macro arguments.  Also,
2852   // check other constraints on the first token of the macro body.
2853   if (Tok.is(tok::eod)) {
2854     if (ImmediatelyAfterHeaderGuard) {
2855       // Save this macro information since it may part of a header guard.
2856       CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),
2857                                         MacroNameTok.getLocation());
2858     }
2859     // If there is no body to this macro, we have no special handling here.
2860   } else if (Tok.hasLeadingSpace()) {
2861     // This is a normal token with leading space.  Clear the leading space
2862     // marker on the first token to get proper expansion.
2863     Tok.clearFlag(Token::LeadingSpace);
2864   } else if (Tok.is(tok::l_paren)) {
2865     // This is a function-like macro definition.  Read the argument list.
2866     MI->setIsFunctionLike();
2867     if (ReadMacroParameterList(MI, LastTok))
2868       return nullptr;
2869 
2870     // If this is a definition of an ISO C/C++ variadic function-like macro (not
2871     // using the GNU named varargs extension) inform our variadic scope guard
2872     // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2873     // allowed only within the definition of a variadic macro.
2874 
2875     if (MI->isC99Varargs()) {
2876       VariadicMacroScopeGuard.enterScope();
2877     }
2878 
2879     // Read the first token after the arg list for down below.
2880     LexUnexpandedToken(Tok);
2881   } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2882     // C99 requires whitespace between the macro definition and the body.  Emit
2883     // a diagnostic for something like "#define X+".
2884     Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2885   } else {
2886     // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2887     // first character of a replacement list is not a character required by
2888     // subclause 5.2.1, then there shall be white-space separation between the
2889     // identifier and the replacement list.".  5.2.1 lists this set:
2890     //   "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
2891     // is irrelevant here.
2892     bool isInvalid = false;
2893     if (Tok.is(tok::at)) // @ is not in the list above.
2894       isInvalid = true;
2895     else if (Tok.is(tok::unknown)) {
2896       // If we have an unknown token, it is something strange like "`".  Since
2897       // all of valid characters would have lexed into a single character
2898       // token of some sort, we know this is not a valid case.
2899       isInvalid = true;
2900     }
2901     if (isInvalid)
2902       Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
2903     else
2904       Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
2905   }
2906 
2907   if (!Tok.is(tok::eod))
2908     LastTok = Tok;
2909 
2910   SmallVector<Token, 16> Tokens;
2911 
2912   // Read the rest of the macro body.
2913   if (MI->isObjectLike()) {
2914     // Object-like macros are very simple, just read their body.
2915     while (Tok.isNot(tok::eod)) {
2916       LastTok = Tok;
2917       Tokens.push_back(Tok);
2918       // Get the next token of the macro.
2919       LexUnexpandedToken(Tok);
2920     }
2921   } else {
2922     // Otherwise, read the body of a function-like macro.  While we are at it,
2923     // check C99 6.10.3.2p1: ensure that # operators are followed by macro
2924     // parameters in function-like macro expansions.
2925 
2926     VAOptDefinitionContext VAOCtx(*this);
2927 
2928     while (Tok.isNot(tok::eod)) {
2929       LastTok = Tok;
2930 
2931       if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
2932         Tokens.push_back(Tok);
2933 
2934         if (VAOCtx.isVAOptToken(Tok)) {
2935           // If we're already within a VAOPT, emit an error.
2936           if (VAOCtx.isInVAOpt()) {
2937             Diag(Tok, diag::err_pp_vaopt_nested_use);
2938             return nullptr;
2939           }
2940           // Ensure VAOPT is followed by a '(' .
2941           LexUnexpandedToken(Tok);
2942           if (Tok.isNot(tok::l_paren)) {
2943             Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
2944             return nullptr;
2945           }
2946           Tokens.push_back(Tok);
2947           VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
2948           LexUnexpandedToken(Tok);
2949           if (Tok.is(tok::hashhash)) {
2950             Diag(Tok, diag::err_vaopt_paste_at_start);
2951             return nullptr;
2952           }
2953           continue;
2954         } else if (VAOCtx.isInVAOpt()) {
2955           if (Tok.is(tok::r_paren)) {
2956             if (VAOCtx.sawClosingParen()) {
2957               assert(Tokens.size() >= 3 &&
2958                      "Must have seen at least __VA_OPT__( "
2959                      "and a subsequent tok::r_paren");
2960               if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {
2961                 Diag(Tok, diag::err_vaopt_paste_at_end);
2962                 return nullptr;
2963               }
2964             }
2965           } else if (Tok.is(tok::l_paren)) {
2966             VAOCtx.sawOpeningParen(Tok.getLocation());
2967           }
2968         }
2969         // Get the next token of the macro.
2970         LexUnexpandedToken(Tok);
2971         continue;
2972       }
2973 
2974       // If we're in -traditional mode, then we should ignore stringification
2975       // and token pasting. Mark the tokens as unknown so as not to confuse
2976       // things.
2977       if (getLangOpts().TraditionalCPP) {
2978         Tok.setKind(tok::unknown);
2979         Tokens.push_back(Tok);
2980 
2981         // Get the next token of the macro.
2982         LexUnexpandedToken(Tok);
2983         continue;
2984       }
2985 
2986       if (Tok.is(tok::hashhash)) {
2987         // If we see token pasting, check if it looks like the gcc comma
2988         // pasting extension.  We'll use this information to suppress
2989         // diagnostics later on.
2990 
2991         // Get the next token of the macro.
2992         LexUnexpandedToken(Tok);
2993 
2994         if (Tok.is(tok::eod)) {
2995           Tokens.push_back(LastTok);
2996           break;
2997         }
2998 
2999         if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3000             Tokens[Tokens.size() - 1].is(tok::comma))
3001           MI->setHasCommaPasting();
3002 
3003         // Things look ok, add the '##' token to the macro.
3004         Tokens.push_back(LastTok);
3005         continue;
3006       }
3007 
3008       // Our Token is a stringization operator.
3009       // Get the next token of the macro.
3010       LexUnexpandedToken(Tok);
3011 
3012       // Check for a valid macro arg identifier or __VA_OPT__.
3013       if (!VAOCtx.isVAOptToken(Tok) &&
3014           (Tok.getIdentifierInfo() == nullptr ||
3015            MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {
3016 
3017         // If this is assembler-with-cpp mode, we accept random gibberish after
3018         // the '#' because '#' is often a comment character.  However, change
3019         // the kind of the token to tok::unknown so that the preprocessor isn't
3020         // confused.
3021         if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
3022           LastTok.setKind(tok::unknown);
3023           Tokens.push_back(LastTok);
3024           continue;
3025         } else {
3026           Diag(Tok, diag::err_pp_stringize_not_parameter)
3027             << LastTok.is(tok::hashat);
3028           return nullptr;
3029         }
3030       }
3031 
3032       // Things look ok, add the '#' and param name tokens to the macro.
3033       Tokens.push_back(LastTok);
3034 
3035       // If the token following '#' is VAOPT, let the next iteration handle it
3036       // and check it for correctness, otherwise add the token and prime the
3037       // loop with the next one.
3038       if (!VAOCtx.isVAOptToken(Tok)) {
3039         Tokens.push_back(Tok);
3040         LastTok = Tok;
3041 
3042         // Get the next token of the macro.
3043         LexUnexpandedToken(Tok);
3044       }
3045     }
3046     if (VAOCtx.isInVAOpt()) {
3047       assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3048       Diag(Tok, diag::err_pp_expected_after)
3049         << LastTok.getKind() << tok::r_paren;
3050       Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
3051       return nullptr;
3052     }
3053   }
3054   MI->setDefinitionEndLoc(LastTok.getLocation());
3055 
3056   MI->setTokens(Tokens, BP);
3057   return MI;
3058 }
3059 
isObjCProtectedMacro(const IdentifierInfo * II)3060 static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3061   return II->isStr("__strong") || II->isStr("__weak") ||
3062          II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing");
3063 }
3064 
3065 /// HandleDefineDirective - Implements \#define.  This consumes the entire macro
3066 /// line then lets the caller lex the next real token.
HandleDefineDirective(Token & DefineTok,const bool ImmediatelyAfterHeaderGuard)3067 void Preprocessor::HandleDefineDirective(
3068     Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3069   ++NumDefined;
3070 
3071   Token MacroNameTok;
3072   bool MacroShadowsKeyword;
3073   ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
3074 
3075   // Error reading macro name?  If so, diagnostic already issued.
3076   if (MacroNameTok.is(tok::eod))
3077     return;
3078 
3079   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3080   // Issue a final pragma warning if we're defining a macro that was has been
3081   // undefined and is being redefined.
3082   if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3083     emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3084 
3085   // If we are supposed to keep comments in #defines, reenable comment saving
3086   // mode.
3087   if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3088 
3089   MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3090       MacroNameTok, ImmediatelyAfterHeaderGuard);
3091 
3092   if (!MI) return;
3093 
3094   if (MacroShadowsKeyword &&
3095       !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
3096     Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
3097   }
3098   // Check that there is no paste (##) operator at the beginning or end of the
3099   // replacement list.
3100   unsigned NumTokens = MI->getNumTokens();
3101   if (NumTokens != 0) {
3102     if (MI->getReplacementToken(0).is(tok::hashhash)) {
3103       Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
3104       return;
3105     }
3106     if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
3107       Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
3108       return;
3109     }
3110   }
3111 
3112   // When skipping just warn about macros that do not match.
3113   if (SkippingUntilPCHThroughHeader) {
3114     const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());
3115     if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
3116                              /*Syntactic=*/LangOpts.MicrosoftExt))
3117       Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
3118           << MacroNameTok.getIdentifierInfo();
3119     // Issue the diagnostic but allow the change if msvc extensions are enabled
3120     if (!LangOpts.MicrosoftExt)
3121       return;
3122   }
3123 
3124   // Finally, if this identifier already had a macro defined for it, verify that
3125   // the macro bodies are identical, and issue diagnostics if they are not.
3126   if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
3127     // Final macros are hard-mode: they always warn. Even if the bodies are
3128     // identical. Even if they are in system headers. Even if they are things we
3129     // would silently allow in the past.
3130     if (MacroNameTok.getIdentifierInfo()->isFinal())
3131       emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3132 
3133     // In Objective-C, ignore attempts to directly redefine the builtin
3134     // definitions of the ownership qualifiers.  It's still possible to
3135     // #undef them.
3136     if (getLangOpts().ObjC &&
3137         SourceMgr.getFileID(OtherMI->getDefinitionLoc()) ==
3138             getPredefinesFileID() &&
3139         isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
3140       // Warn if it changes the tokens.
3141       if ((!getDiagnostics().getSuppressSystemWarnings() ||
3142            !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
3143           !MI->isIdenticalTo(*OtherMI, *this,
3144                              /*Syntactic=*/LangOpts.MicrosoftExt)) {
3145         Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
3146       }
3147       assert(!OtherMI->isWarnIfUnused());
3148       return;
3149     }
3150 
3151     // It is very common for system headers to have tons of macro redefinitions
3152     // and for warnings to be disabled in system headers.  If this is the case,
3153     // then don't bother calling MacroInfo::isIdenticalTo.
3154     if (!getDiagnostics().getSuppressSystemWarnings() ||
3155         !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
3156 
3157       if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3158         Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
3159 
3160       // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3161       // C++ [cpp.predefined]p4, but allow it as an extension.
3162       if (isLanguageDefinedBuiltin(SourceMgr, OtherMI, II->getName()))
3163         Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
3164       // Macros must be identical.  This means all tokens and whitespace
3165       // separation must be the same.  C99 6.10.3p2.
3166       else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3167                !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
3168         Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
3169           << MacroNameTok.getIdentifierInfo();
3170         Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
3171       }
3172     }
3173     if (OtherMI->isWarnIfUnused())
3174       WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
3175   }
3176 
3177   DefMacroDirective *MD =
3178       appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
3179 
3180   assert(!MI->isUsed());
3181   // If we need warning for not using the macro, add its location in the
3182   // warn-because-unused-macro set. If it gets used it will be removed from set.
3183   if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
3184       !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
3185       !MacroExpansionInDirectivesOverride &&
3186       getSourceManager().getFileID(MI->getDefinitionLoc()) !=
3187           getPredefinesFileID()) {
3188     MI->setIsWarnIfUnused(true);
3189     WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
3190   }
3191 
3192   // If the callbacks want to know, tell them about the macro definition.
3193   if (Callbacks)
3194     Callbacks->MacroDefined(MacroNameTok, MD);
3195 
3196   // If we're in MS compatibility mode and the macro being defined is the
3197   // assert macro, implicitly add a macro definition for static_assert to work
3198   // around their broken assert.h header file in C. Only do so if there isn't
3199   // already a static_assert macro defined.
3200   if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat &&
3201       MacroNameTok.getIdentifierInfo()->isStr("assert") &&
3202       !isMacroDefined("static_assert")) {
3203     MacroInfo *MI = AllocateMacroInfo(SourceLocation());
3204 
3205     Token Tok;
3206     Tok.startToken();
3207     Tok.setKind(tok::kw__Static_assert);
3208     Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert"));
3209     MI->setTokens({Tok}, BP);
3210     (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI);
3211   }
3212 }
3213 
3214 /// HandleUndefDirective - Implements \#undef.
3215 ///
HandleUndefDirective()3216 void Preprocessor::HandleUndefDirective() {
3217   ++NumUndefined;
3218 
3219   Token MacroNameTok;
3220   ReadMacroName(MacroNameTok, MU_Undef);
3221 
3222   // Error reading macro name?  If so, diagnostic already issued.
3223   if (MacroNameTok.is(tok::eod))
3224     return;
3225 
3226   // Check to see if this is the last token on the #undef line.
3227   CheckEndOfDirective("undef");
3228 
3229   // Okay, we have a valid identifier to undef.
3230   auto *II = MacroNameTok.getIdentifierInfo();
3231   auto MD = getMacroDefinition(II);
3232   UndefMacroDirective *Undef = nullptr;
3233 
3234   if (II->isFinal())
3235     emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);
3236 
3237   // If the macro is not defined, this is a noop undef.
3238   if (const MacroInfo *MI = MD.getMacroInfo()) {
3239     if (!MI->isUsed() && MI->isWarnIfUnused())
3240       Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
3241 
3242     // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3243     // C++ [cpp.predefined]p4, but allow it as an extension.
3244     if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName()))
3245       Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
3246 
3247     if (MI->isWarnIfUnused())
3248       WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
3249 
3250     Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());
3251   }
3252 
3253   // If the callbacks want to know, tell them about the macro #undef.
3254   // Note: no matter if the macro was defined or not.
3255   if (Callbacks)
3256     Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3257 
3258   if (Undef)
3259     appendMacroDirective(II, Undef);
3260 }
3261 
3262 //===----------------------------------------------------------------------===//
3263 // Preprocessor Conditional Directive Handling.
3264 //===----------------------------------------------------------------------===//
3265 
3266 /// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive.  isIfndef
3267 /// is true when this is a \#ifndef directive.  ReadAnyTokensBeforeDirective is
3268 /// true if any tokens have been returned or pp-directives activated before this
3269 /// \#ifndef has been lexed.
3270 ///
HandleIfdefDirective(Token & Result,const Token & HashToken,bool isIfndef,bool ReadAnyTokensBeforeDirective)3271 void Preprocessor::HandleIfdefDirective(Token &Result,
3272                                         const Token &HashToken,
3273                                         bool isIfndef,
3274                                         bool ReadAnyTokensBeforeDirective) {
3275   ++NumIf;
3276   Token DirectiveTok = Result;
3277 
3278   Token MacroNameTok;
3279   ReadMacroName(MacroNameTok);
3280 
3281   // Error reading macro name?  If so, diagnostic already issued.
3282   if (MacroNameTok.is(tok::eod)) {
3283     // Skip code until we get to #endif.  This helps with recovery by not
3284     // emitting an error when the #endif is reached.
3285     SkipExcludedConditionalBlock(HashToken.getLocation(),
3286                                  DirectiveTok.getLocation(),
3287                                  /*Foundnonskip*/ false, /*FoundElse*/ false);
3288     return;
3289   }
3290 
3291   emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true);
3292 
3293   // Check to see if this is the last token on the #if[n]def line.
3294   CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
3295 
3296   IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3297   auto MD = getMacroDefinition(MII);
3298   MacroInfo *MI = MD.getMacroInfo();
3299 
3300   if (CurPPLexer->getConditionalStackDepth() == 0) {
3301     // If the start of a top-level #ifdef and if the macro is not defined,
3302     // inform MIOpt that this might be the start of a proper include guard.
3303     // Otherwise it is some other form of unknown conditional which we can't
3304     // handle.
3305     if (!ReadAnyTokensBeforeDirective && !MI) {
3306       assert(isIfndef && "#ifdef shouldn't reach here");
3307       CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());
3308     } else
3309       CurPPLexer->MIOpt.EnterTopLevelConditional();
3310   }
3311 
3312   // If there is a macro, process it.
3313   if (MI)  // Mark it used.
3314     markMacroAsUsed(MI);
3315 
3316   if (Callbacks) {
3317     if (isIfndef)
3318       Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
3319     else
3320       Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
3321   }
3322 
3323   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3324     getSourceManager().isInMainFile(DirectiveTok.getLocation());
3325 
3326   // Should we include the stuff contained by this directive?
3327   if (PPOpts->SingleFileParseMode && !MI) {
3328     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3329     // the directive blocks.
3330     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3331                                      /*wasskip*/false, /*foundnonskip*/false,
3332                                      /*foundelse*/false);
3333   } else if (!MI == isIfndef || RetainExcludedCB) {
3334     // Yes, remember that we are inside a conditional, then lex the next token.
3335     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3336                                      /*wasskip*/false, /*foundnonskip*/true,
3337                                      /*foundelse*/false);
3338   } else {
3339     // No, skip the contents of this block.
3340     SkipExcludedConditionalBlock(HashToken.getLocation(),
3341                                  DirectiveTok.getLocation(),
3342                                  /*Foundnonskip*/ false,
3343                                  /*FoundElse*/ false);
3344   }
3345 }
3346 
3347 /// HandleIfDirective - Implements the \#if directive.
3348 ///
HandleIfDirective(Token & IfToken,const Token & HashToken,bool ReadAnyTokensBeforeDirective)3349 void Preprocessor::HandleIfDirective(Token &IfToken,
3350                                      const Token &HashToken,
3351                                      bool ReadAnyTokensBeforeDirective) {
3352   ++NumIf;
3353 
3354   // Parse and evaluate the conditional expression.
3355   IdentifierInfo *IfNDefMacro = nullptr;
3356   const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3357   const bool ConditionalTrue = DER.Conditional;
3358   // Lexer might become invalid if we hit code completion point while evaluating
3359   // expression.
3360   if (!CurPPLexer)
3361     return;
3362 
3363   // If this condition is equivalent to #ifndef X, and if this is the first
3364   // directive seen, handle it for the multiple-include optimization.
3365   if (CurPPLexer->getConditionalStackDepth() == 0) {
3366     if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3367       // FIXME: Pass in the location of the macro name, not the 'if' token.
3368       CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());
3369     else
3370       CurPPLexer->MIOpt.EnterTopLevelConditional();
3371   }
3372 
3373   if (Callbacks)
3374     Callbacks->If(
3375         IfToken.getLocation(), DER.ExprRange,
3376         (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3377 
3378   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3379     getSourceManager().isInMainFile(IfToken.getLocation());
3380 
3381   // Should we include the stuff contained by this directive?
3382   if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
3383     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3384     // the directive blocks.
3385     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3386                                      /*foundnonskip*/false, /*foundelse*/false);
3387   } else if (ConditionalTrue || RetainExcludedCB) {
3388     // Yes, remember that we are inside a conditional, then lex the next token.
3389     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3390                                    /*foundnonskip*/true, /*foundelse*/false);
3391   } else {
3392     // No, skip the contents of this block.
3393     SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
3394                                  /*Foundnonskip*/ false,
3395                                  /*FoundElse*/ false);
3396   }
3397 }
3398 
3399 /// HandleEndifDirective - Implements the \#endif directive.
3400 ///
HandleEndifDirective(Token & EndifToken)3401 void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3402   ++NumEndif;
3403 
3404   // Check that this is the whole directive.
3405   CheckEndOfDirective("endif");
3406 
3407   PPConditionalInfo CondInfo;
3408   if (CurPPLexer->popConditionalLevel(CondInfo)) {
3409     // No conditionals on the stack: this is an #endif without an #if.
3410     Diag(EndifToken, diag::err_pp_endif_without_if);
3411     return;
3412   }
3413 
3414   // If this the end of a top-level #endif, inform MIOpt.
3415   if (CurPPLexer->getConditionalStackDepth() == 0)
3416     CurPPLexer->MIOpt.ExitTopLevelConditional();
3417 
3418   assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3419          "This code should only be reachable in the non-skipping case!");
3420 
3421   if (Callbacks)
3422     Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);
3423 }
3424 
3425 /// HandleElseDirective - Implements the \#else directive.
3426 ///
HandleElseDirective(Token & Result,const Token & HashToken)3427 void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3428   ++NumElse;
3429 
3430   // #else directive in a non-skipping conditional... start skipping.
3431   CheckEndOfDirective("else");
3432 
3433   PPConditionalInfo CI;
3434   if (CurPPLexer->popConditionalLevel(CI)) {
3435     Diag(Result, diag::pp_err_else_without_if);
3436     return;
3437   }
3438 
3439   // If this is a top-level #else, inform the MIOpt.
3440   if (CurPPLexer->getConditionalStackDepth() == 0)
3441     CurPPLexer->MIOpt.EnterTopLevelConditional();
3442 
3443   // If this is a #else with a #else before it, report the error.
3444   if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
3445 
3446   if (Callbacks)
3447     Callbacks->Else(Result.getLocation(), CI.IfLoc);
3448 
3449   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3450     getSourceManager().isInMainFile(Result.getLocation());
3451 
3452   if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3453     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3454     // the directive blocks.
3455     CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
3456                                      /*foundnonskip*/false, /*foundelse*/true);
3457     return;
3458   }
3459 
3460   // Finally, skip the rest of the contents of this block.
3461   SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
3462                                /*Foundnonskip*/ true,
3463                                /*FoundElse*/ true, Result.getLocation());
3464 }
3465 
3466 /// Implements the \#elif, \#elifdef, and \#elifndef directives.
HandleElifFamilyDirective(Token & ElifToken,const Token & HashToken,tok::PPKeywordKind Kind)3467 void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3468                                              const Token &HashToken,
3469                                              tok::PPKeywordKind Kind) {
3470   PPElifDiag DirKind = Kind == tok::pp_elif      ? PED_Elif
3471                        : Kind == tok::pp_elifdef ? PED_Elifdef
3472                                                  : PED_Elifndef;
3473   ++NumElse;
3474 
3475   // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3476   switch (DirKind) {
3477   case PED_Elifdef:
3478   case PED_Elifndef:
3479     unsigned DiagID;
3480     if (LangOpts.CPlusPlus)
3481       DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3482                                     : diag::ext_cxx23_pp_directive;
3483     else
3484       DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3485                             : diag::ext_c23_pp_directive;
3486     Diag(ElifToken, DiagID) << DirKind;
3487     break;
3488   default:
3489     break;
3490   }
3491 
3492   // #elif directive in a non-skipping conditional... start skipping.
3493   // We don't care what the condition is, because we will always skip it (since
3494   // the block immediately before it was included).
3495   SourceRange ConditionRange = DiscardUntilEndOfDirective();
3496 
3497   PPConditionalInfo CI;
3498   if (CurPPLexer->popConditionalLevel(CI)) {
3499     Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind;
3500     return;
3501   }
3502 
3503   // If this is a top-level #elif, inform the MIOpt.
3504   if (CurPPLexer->getConditionalStackDepth() == 0)
3505     CurPPLexer->MIOpt.EnterTopLevelConditional();
3506 
3507   // If this is a #elif with a #else before it, report the error.
3508   if (CI.FoundElse)
3509     Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind;
3510 
3511   if (Callbacks) {
3512     switch (Kind) {
3513     case tok::pp_elif:
3514       Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
3515                       PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
3516       break;
3517     case tok::pp_elifdef:
3518       Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3519       break;
3520     case tok::pp_elifndef:
3521       Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3522       break;
3523     default:
3524       assert(false && "unexpected directive kind");
3525       break;
3526     }
3527   }
3528 
3529   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3530     getSourceManager().isInMainFile(ElifToken.getLocation());
3531 
3532   if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3533     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3534     // the directive blocks.
3535     CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
3536                                      /*foundnonskip*/false, /*foundelse*/false);
3537     return;
3538   }
3539 
3540   // Finally, skip the rest of the contents of this block.
3541   SkipExcludedConditionalBlock(
3542       HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
3543       /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
3544 }
3545