1 //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Implements # directive processing for the Preprocessor.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/DirectoryEntry.h"
16 #include "clang/Basic/FileManager.h"
17 #include "clang/Basic/IdentifierTable.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/Module.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Basic/TokenKinds.h"
23 #include "clang/Lex/CodeCompletionHandler.h"
24 #include "clang/Lex/HeaderSearch.h"
25 #include "clang/Lex/HeaderSearchOptions.h"
26 #include "clang/Lex/LexDiagnostic.h"
27 #include "clang/Lex/LiteralSupport.h"
28 #include "clang/Lex/MacroInfo.h"
29 #include "clang/Lex/ModuleLoader.h"
30 #include "clang/Lex/ModuleMap.h"
31 #include "clang/Lex/PPCallbacks.h"
32 #include "clang/Lex/Pragma.h"
33 #include "clang/Lex/Preprocessor.h"
34 #include "clang/Lex/PreprocessorOptions.h"
35 #include "clang/Lex/Token.h"
36 #include "clang/Lex/VariadicMacroSupport.h"
37 #include "llvm/ADT/ArrayRef.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/ScopeExit.h"
40 #include "llvm/ADT/SmallString.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/ADT/StringSwitch.h"
44 #include "llvm/Support/AlignOf.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include "llvm/Support/Path.h"
47 #include "llvm/Support/SaveAndRestore.h"
48 #include <algorithm>
49 #include <cassert>
50 #include <cstring>
51 #include <new>
52 #include <optional>
53 #include <string>
54 #include <utility>
55
56 using namespace clang;
57
58 //===----------------------------------------------------------------------===//
59 // Utility Methods for Preprocessor Directive Handling.
60 //===----------------------------------------------------------------------===//
61
AllocateMacroInfo(SourceLocation L)62 MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
63 static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
64 return new (BP) MacroInfo(L);
65 }
66
AllocateDefMacroDirective(MacroInfo * MI,SourceLocation Loc)67 DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
68 SourceLocation Loc) {
69 return new (BP) DefMacroDirective(MI, Loc);
70 }
71
72 UndefMacroDirective *
AllocateUndefMacroDirective(SourceLocation UndefLoc)73 Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
74 return new (BP) UndefMacroDirective(UndefLoc);
75 }
76
77 VisibilityMacroDirective *
AllocateVisibilityMacroDirective(SourceLocation Loc,bool isPublic)78 Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
79 bool isPublic) {
80 return new (BP) VisibilityMacroDirective(Loc, isPublic);
81 }
82
83 /// Read and discard all tokens remaining on the current line until
84 /// the tok::eod token is found.
DiscardUntilEndOfDirective()85 SourceRange Preprocessor::DiscardUntilEndOfDirective() {
86 Token Tmp;
87 SourceRange Res;
88
89 LexUnexpandedToken(Tmp);
90 Res.setBegin(Tmp.getLocation());
91 while (Tmp.isNot(tok::eod)) {
92 assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
93 LexUnexpandedToken(Tmp);
94 }
95 Res.setEnd(Tmp.getLocation());
96 return Res;
97 }
98
99 /// Enumerates possible cases of #define/#undef a reserved identifier.
100 enum MacroDiag {
101 MD_NoWarn, //> Not a reserved identifier
102 MD_KeywordDef, //> Macro hides keyword, enabled by default
103 MD_ReservedMacro //> #define of #undef reserved id, disabled by default
104 };
105
106 /// Enumerates possible %select values for the pp_err_elif_after_else and
107 /// pp_err_elif_without_if diagnostics.
108 enum PPElifDiag {
109 PED_Elif,
110 PED_Elifdef,
111 PED_Elifndef
112 };
113
isFeatureTestMacro(StringRef MacroName)114 static bool isFeatureTestMacro(StringRef MacroName) {
115 // list from:
116 // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
117 // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
118 // * man 7 feature_test_macros
119 // The list must be sorted for correct binary search.
120 static constexpr StringRef ReservedMacro[] = {
121 "_ATFILE_SOURCE",
122 "_BSD_SOURCE",
123 "_CRT_NONSTDC_NO_WARNINGS",
124 "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
125 "_CRT_SECURE_NO_WARNINGS",
126 "_FILE_OFFSET_BITS",
127 "_FORTIFY_SOURCE",
128 "_GLIBCXX_ASSERTIONS",
129 "_GLIBCXX_CONCEPT_CHECKS",
130 "_GLIBCXX_DEBUG",
131 "_GLIBCXX_DEBUG_PEDANTIC",
132 "_GLIBCXX_PARALLEL",
133 "_GLIBCXX_PARALLEL_ASSERTIONS",
134 "_GLIBCXX_SANITIZE_VECTOR",
135 "_GLIBCXX_USE_CXX11_ABI",
136 "_GLIBCXX_USE_DEPRECATED",
137 "_GNU_SOURCE",
138 "_ISOC11_SOURCE",
139 "_ISOC95_SOURCE",
140 "_ISOC99_SOURCE",
141 "_LARGEFILE64_SOURCE",
142 "_POSIX_C_SOURCE",
143 "_REENTRANT",
144 "_SVID_SOURCE",
145 "_THREAD_SAFE",
146 "_XOPEN_SOURCE",
147 "_XOPEN_SOURCE_EXTENDED",
148 "__STDCPP_WANT_MATH_SPEC_FUNCS__",
149 "__STDC_FORMAT_MACROS",
150 };
151 return std::binary_search(std::begin(ReservedMacro), std::end(ReservedMacro),
152 MacroName);
153 }
154
isLanguageDefinedBuiltin(const SourceManager & SourceMgr,const MacroInfo * MI,const StringRef MacroName)155 static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
156 const MacroInfo *MI,
157 const StringRef MacroName) {
158 // If this is a macro with special handling (like __LINE__) then it's language
159 // defined.
160 if (MI->isBuiltinMacro())
161 return true;
162 // Builtin macros are defined in the builtin file
163 if (!SourceMgr.isWrittenInBuiltinFile(MI->getDefinitionLoc()))
164 return false;
165 // C defines macros starting with __STDC, and C++ defines macros starting with
166 // __STDCPP
167 if (MacroName.starts_with("__STDC"))
168 return true;
169 // C++ defines the __cplusplus macro
170 if (MacroName == "__cplusplus")
171 return true;
172 // C++ defines various feature-test macros starting with __cpp
173 if (MacroName.starts_with("__cpp"))
174 return true;
175 // Anything else isn't language-defined
176 return false;
177 }
178
shouldWarnOnMacroDef(Preprocessor & PP,IdentifierInfo * II)179 static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
180 const LangOptions &Lang = PP.getLangOpts();
181 StringRef Text = II->getName();
182 if (isReservedInAllContexts(II->isReserved(Lang)))
183 return isFeatureTestMacro(Text) ? MD_NoWarn : MD_ReservedMacro;
184 if (II->isKeyword(Lang))
185 return MD_KeywordDef;
186 if (Lang.CPlusPlus11 && (Text.equals("override") || Text.equals("final")))
187 return MD_KeywordDef;
188 return MD_NoWarn;
189 }
190
shouldWarnOnMacroUndef(Preprocessor & PP,IdentifierInfo * II)191 static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
192 const LangOptions &Lang = PP.getLangOpts();
193 // Do not warn on keyword undef. It is generally harmless and widely used.
194 if (isReservedInAllContexts(II->isReserved(Lang)))
195 return MD_ReservedMacro;
196 return MD_NoWarn;
197 }
198
199 // Return true if we want to issue a diagnostic by default if we
200 // encounter this name in a #include with the wrong case. For now,
201 // this includes the standard C and C++ headers, Posix headers,
202 // and Boost headers. Improper case for these #includes is a
203 // potential portability issue.
warnByDefaultOnWrongCase(StringRef Include)204 static bool warnByDefaultOnWrongCase(StringRef Include) {
205 // If the first component of the path is "boost", treat this like a standard header
206 // for the purposes of diagnostics.
207 if (::llvm::sys::path::begin(Include)->equals_insensitive("boost"))
208 return true;
209
210 // "condition_variable" is the longest standard header name at 18 characters.
211 // If the include file name is longer than that, it can't be a standard header.
212 static const size_t MaxStdHeaderNameLen = 18u;
213 if (Include.size() > MaxStdHeaderNameLen)
214 return false;
215
216 // Lowercase and normalize the search string.
217 SmallString<32> LowerInclude{Include};
218 for (char &Ch : LowerInclude) {
219 // In the ASCII range?
220 if (static_cast<unsigned char>(Ch) > 0x7f)
221 return false; // Can't be a standard header
222 // ASCII lowercase:
223 if (Ch >= 'A' && Ch <= 'Z')
224 Ch += 'a' - 'A';
225 // Normalize path separators for comparison purposes.
226 else if (::llvm::sys::path::is_separator(Ch))
227 Ch = '/';
228 }
229
230 // The standard C/C++ and Posix headers
231 return llvm::StringSwitch<bool>(LowerInclude)
232 // C library headers
233 .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
234 .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
235 .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
236 .Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stddef.h", true)
237 .Cases("stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true)
238 .Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true)
239 .Cases("wchar.h", "wctype.h", true)
240
241 // C++ headers for C library facilities
242 .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
243 .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
244 .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
245 .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
246 .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
247 .Case("cwctype", true)
248
249 // C++ library headers
250 .Cases("algorithm", "fstream", "list", "regex", "thread", true)
251 .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
252 .Cases("atomic", "future", "map", "set", "type_traits", true)
253 .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
254 .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
255 .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
256 .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
257 .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
258 .Cases("deque", "istream", "queue", "string", "valarray", true)
259 .Cases("exception", "iterator", "random", "strstream", "vector", true)
260 .Cases("forward_list", "limits", "ratio", "system_error", true)
261
262 // POSIX headers (which aren't also C headers)
263 .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
264 .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
265 .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
266 .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
267 .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
268 .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
269 .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
270 .Cases("sys/resource.h", "sys/select.h", "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
271 .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
272 .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
273 .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
274 .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
275 .Default(false);
276 }
277
278 /// Find a similar string in `Candidates`.
279 ///
280 /// \param LHS a string for a similar string in `Candidates`
281 ///
282 /// \param Candidates the candidates to find a similar string.
283 ///
284 /// \returns a similar string if exists. If no similar string exists,
285 /// returns std::nullopt.
286 static std::optional<StringRef>
findSimilarStr(StringRef LHS,const std::vector<StringRef> & Candidates)287 findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
288 // We need to check if `Candidates` has the exact case-insensitive string
289 // because the Levenshtein distance match does not care about it.
290 for (StringRef C : Candidates) {
291 if (LHS.equals_insensitive(C)) {
292 return C;
293 }
294 }
295
296 // Keep going with the Levenshtein distance match.
297 // If the LHS size is less than 3, use the LHS size minus 1 and if not,
298 // use the LHS size divided by 3.
299 size_t Length = LHS.size();
300 size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
301
302 std::optional<std::pair<StringRef, size_t>> SimilarStr;
303 for (StringRef C : Candidates) {
304 size_t CurDist = LHS.edit_distance(C, true);
305 if (CurDist <= MaxDist) {
306 if (!SimilarStr) {
307 // The first similar string found.
308 SimilarStr = {C, CurDist};
309 } else if (CurDist < SimilarStr->second) {
310 // More similar string found.
311 SimilarStr = {C, CurDist};
312 }
313 }
314 }
315
316 if (SimilarStr) {
317 return SimilarStr->first;
318 } else {
319 return std::nullopt;
320 }
321 }
322
CheckMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)323 bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
324 bool *ShadowFlag) {
325 // Missing macro name?
326 if (MacroNameTok.is(tok::eod))
327 return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
328
329 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
330 if (!II)
331 return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
332
333 if (II->isCPlusPlusOperatorKeyword()) {
334 // C++ 2.5p2: Alternative tokens behave the same as its primary token
335 // except for their spellings.
336 Diag(MacroNameTok, getLangOpts().MicrosoftExt
337 ? diag::ext_pp_operator_used_as_macro_name
338 : diag::err_pp_operator_used_as_macro_name)
339 << II << MacroNameTok.getKind();
340 // Allow #defining |and| and friends for Microsoft compatibility or
341 // recovery when legacy C headers are included in C++.
342 }
343
344 if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
345 // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
346 return Diag(MacroNameTok, diag::err_defined_macro_name);
347 }
348
349 // If defining/undefining reserved identifier or a keyword, we need to issue
350 // a warning.
351 SourceLocation MacroNameLoc = MacroNameTok.getLocation();
352 if (ShadowFlag)
353 *ShadowFlag = false;
354 if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&
355 (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) {
356 MacroDiag D = MD_NoWarn;
357 if (isDefineUndef == MU_Define) {
358 D = shouldWarnOnMacroDef(*this, II);
359 }
360 else if (isDefineUndef == MU_Undef)
361 D = shouldWarnOnMacroUndef(*this, II);
362 if (D == MD_KeywordDef) {
363 // We do not want to warn on some patterns widely used in configuration
364 // scripts. This requires analyzing next tokens, so do not issue warnings
365 // now, only inform caller.
366 if (ShadowFlag)
367 *ShadowFlag = true;
368 }
369 if (D == MD_ReservedMacro)
370 Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
371 }
372
373 // Okay, we got a good identifier.
374 return false;
375 }
376
377 /// Lex and validate a macro name, which occurs after a
378 /// \#define or \#undef.
379 ///
380 /// This sets the token kind to eod and discards the rest of the macro line if
381 /// the macro name is invalid.
382 ///
383 /// \param MacroNameTok Token that is expected to be a macro name.
384 /// \param isDefineUndef Context in which macro is used.
385 /// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
ReadMacroName(Token & MacroNameTok,MacroUse isDefineUndef,bool * ShadowFlag)386 void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
387 bool *ShadowFlag) {
388 // Read the token, don't allow macro expansion on it.
389 LexUnexpandedToken(MacroNameTok);
390
391 if (MacroNameTok.is(tok::code_completion)) {
392 if (CodeComplete)
393 CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);
394 setCodeCompletionReached();
395 LexUnexpandedToken(MacroNameTok);
396 }
397
398 if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
399 return;
400
401 // Invalid macro name, read and discard the rest of the line and set the
402 // token kind to tok::eod if necessary.
403 if (MacroNameTok.isNot(tok::eod)) {
404 MacroNameTok.setKind(tok::eod);
405 DiscardUntilEndOfDirective();
406 }
407 }
408
409 /// Ensure that the next token is a tok::eod token.
410 ///
411 /// If not, emit a diagnostic and consume up until the eod. If EnableMacros is
412 /// true, then we consider macros that expand to zero tokens as being ok.
413 ///
414 /// Returns the location of the end of the directive.
CheckEndOfDirective(const char * DirType,bool EnableMacros)415 SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
416 bool EnableMacros) {
417 Token Tmp;
418 // Lex unexpanded tokens for most directives: macros might expand to zero
419 // tokens, causing us to miss diagnosing invalid lines. Some directives (like
420 // #line) allow empty macros.
421 if (EnableMacros)
422 Lex(Tmp);
423 else
424 LexUnexpandedToken(Tmp);
425
426 // There should be no tokens after the directive, but we allow them as an
427 // extension.
428 while (Tmp.is(tok::comment)) // Skip comments in -C mode.
429 LexUnexpandedToken(Tmp);
430
431 if (Tmp.is(tok::eod))
432 return Tmp.getLocation();
433
434 // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89,
435 // or if this is a macro-style preprocessing directive, because it is more
436 // trouble than it is worth to insert /**/ and check that there is no /**/
437 // in the range also.
438 FixItHint Hint;
439 if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
440 !CurTokenLexer)
441 Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
442 Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
443 return DiscardUntilEndOfDirective().getEnd();
444 }
445
SuggestTypoedDirective(const Token & Tok,StringRef Directive) const446 void Preprocessor::SuggestTypoedDirective(const Token &Tok,
447 StringRef Directive) const {
448 // If this is a `.S` file, treat unknown # directives as non-preprocessor
449 // directives.
450 if (getLangOpts().AsmPreprocessor) return;
451
452 std::vector<StringRef> Candidates = {
453 "if", "ifdef", "ifndef", "elif", "else", "endif"
454 };
455 if (LangOpts.C23 || LangOpts.CPlusPlus23)
456 Candidates.insert(Candidates.end(), {"elifdef", "elifndef"});
457
458 if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {
459 // Directive cannot be coming from macro.
460 assert(Tok.getLocation().isFileID());
461 CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
462 Tok.getLocation(),
463 Tok.getLocation().getLocWithOffset(Directive.size()));
464 StringRef SuggValue = *Sugg;
465
466 auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue);
467 Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
468 }
469 }
470
471 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
472 /// decided that the subsequent tokens are in the \#if'd out portion of the
473 /// file. Lex the rest of the file, until we see an \#endif. If
474 /// FoundNonSkipPortion is true, then we have already emitted code for part of
475 /// this \#if directive, so \#else/\#elif blocks should never be entered.
476 /// If ElseOk is true, then \#else directives are ok, if not, then we have
477 /// already seen one so a \#else directive is a duplicate. When this returns,
478 /// the caller can lex the first valid token.
SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,SourceLocation IfTokenLoc,bool FoundNonSkipPortion,bool FoundElse,SourceLocation ElseLoc)479 void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
480 SourceLocation IfTokenLoc,
481 bool FoundNonSkipPortion,
482 bool FoundElse,
483 SourceLocation ElseLoc) {
484 // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
485 // not getting called recursively by storing the RecordedSkippedRanges
486 // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
487 // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
488 // invalidated. If this changes and there is a need to call
489 // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
490 // change to do a second lookup in endLexPass function instead of reusing the
491 // lookup pointer.
492 assert(!SkippingExcludedConditionalBlock &&
493 "calling SkipExcludedConditionalBlock recursively");
494 llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
495
496 ++NumSkipped;
497 assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
498 assert(CurPPLexer && "Conditional PP block must be in a file!");
499 assert(CurLexer && "Conditional PP block but no current lexer set!");
500
501 if (PreambleConditionalStack.reachedEOFWhileSkipping())
502 PreambleConditionalStack.clearSkipInfo();
503 else
504 CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
505 FoundNonSkipPortion, FoundElse);
506
507 // Enter raw mode to disable identifier lookup (and thus macro expansion),
508 // disabling warnings, etc.
509 CurPPLexer->LexingRawMode = true;
510 Token Tok;
511 SourceLocation endLoc;
512
513 /// Keeps track and caches skipped ranges and also retrieves a prior skipped
514 /// range if the same block is re-visited.
515 struct SkippingRangeStateTy {
516 Preprocessor &PP;
517
518 const char *BeginPtr = nullptr;
519 unsigned *SkipRangePtr = nullptr;
520
521 SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
522
523 void beginLexPass() {
524 if (BeginPtr)
525 return; // continue skipping a block.
526
527 // Initiate a skipping block and adjust the lexer if we already skipped it
528 // before.
529 BeginPtr = PP.CurLexer->getBufferLocation();
530 SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
531 if (*SkipRangePtr) {
532 PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
533 /*IsAtStartOfLine*/ true);
534 }
535 }
536
537 void endLexPass(const char *Hashptr) {
538 if (!BeginPtr) {
539 // Not doing normal lexing.
540 assert(PP.CurLexer->isDependencyDirectivesLexer());
541 return;
542 }
543
544 // Finished skipping a block, record the range if it's first time visited.
545 if (!*SkipRangePtr) {
546 *SkipRangePtr = Hashptr - BeginPtr;
547 }
548 assert(*SkipRangePtr == Hashptr - BeginPtr);
549 BeginPtr = nullptr;
550 SkipRangePtr = nullptr;
551 }
552 } SkippingRangeState(*this);
553
554 while (true) {
555 if (CurLexer->isDependencyDirectivesLexer()) {
556 CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
557 } else {
558 SkippingRangeState.beginLexPass();
559 while (true) {
560 CurLexer->Lex(Tok);
561
562 if (Tok.is(tok::code_completion)) {
563 setCodeCompletionReached();
564 if (CodeComplete)
565 CodeComplete->CodeCompleteInConditionalExclusion();
566 continue;
567 }
568
569 // If this is the end of the buffer, we have an error.
570 if (Tok.is(tok::eof)) {
571 // We don't emit errors for unterminated conditionals here,
572 // Lexer::LexEndOfFile can do that properly.
573 // Just return and let the caller lex after this #include.
574 if (PreambleConditionalStack.isRecording())
575 PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
576 FoundNonSkipPortion,
577 FoundElse, ElseLoc);
578 break;
579 }
580
581 // If this token is not a preprocessor directive, just skip it.
582 if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
583 continue;
584
585 break;
586 }
587 }
588 if (Tok.is(tok::eof))
589 break;
590
591 // We just parsed a # character at the start of a line, so we're in
592 // directive mode. Tell the lexer this so any newlines we see will be
593 // converted into an EOD token (this terminates the macro).
594 CurPPLexer->ParsingPreprocessorDirective = true;
595 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
596
597 assert(Tok.is(tok::hash));
598 const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
599 assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
600
601 // Read the next token, the directive flavor.
602 LexUnexpandedToken(Tok);
603
604 // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
605 // something bogus), skip it.
606 if (Tok.isNot(tok::raw_identifier)) {
607 CurPPLexer->ParsingPreprocessorDirective = false;
608 // Restore comment saving mode.
609 if (CurLexer) CurLexer->resetExtendedTokenMode();
610 continue;
611 }
612
613 // If the first letter isn't i or e, it isn't intesting to us. We know that
614 // this is safe in the face of spelling differences, because there is no way
615 // to spell an i/e in a strange way that is another letter. Skipping this
616 // allows us to avoid looking up the identifier info for #define/#undef and
617 // other common directives.
618 StringRef RI = Tok.getRawIdentifier();
619
620 char FirstChar = RI[0];
621 if (FirstChar >= 'a' && FirstChar <= 'z' &&
622 FirstChar != 'i' && FirstChar != 'e') {
623 CurPPLexer->ParsingPreprocessorDirective = false;
624 // Restore comment saving mode.
625 if (CurLexer) CurLexer->resetExtendedTokenMode();
626 continue;
627 }
628
629 // Get the identifier name without trigraphs or embedded newlines. Note
630 // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
631 // when skipping.
632 char DirectiveBuf[20];
633 StringRef Directive;
634 if (!Tok.needsCleaning() && RI.size() < 20) {
635 Directive = RI;
636 } else {
637 std::string DirectiveStr = getSpelling(Tok);
638 size_t IdLen = DirectiveStr.size();
639 if (IdLen >= 20) {
640 CurPPLexer->ParsingPreprocessorDirective = false;
641 // Restore comment saving mode.
642 if (CurLexer) CurLexer->resetExtendedTokenMode();
643 continue;
644 }
645 memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
646 Directive = StringRef(DirectiveBuf, IdLen);
647 }
648
649 if (Directive.starts_with("if")) {
650 StringRef Sub = Directive.substr(2);
651 if (Sub.empty() || // "if"
652 Sub == "def" || // "ifdef"
653 Sub == "ndef") { // "ifndef"
654 // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
655 // bother parsing the condition.
656 DiscardUntilEndOfDirective();
657 CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
658 /*foundnonskip*/false,
659 /*foundelse*/false);
660 } else {
661 SuggestTypoedDirective(Tok, Directive);
662 }
663 } else if (Directive[0] == 'e') {
664 StringRef Sub = Directive.substr(1);
665 if (Sub == "ndif") { // "endif"
666 PPConditionalInfo CondInfo;
667 CondInfo.WasSkipping = true; // Silence bogus warning.
668 bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
669 (void)InCond; // Silence warning in no-asserts mode.
670 assert(!InCond && "Can't be skipping if not in a conditional!");
671
672 // If we popped the outermost skipping block, we're done skipping!
673 if (!CondInfo.WasSkipping) {
674 SkippingRangeState.endLexPass(Hashptr);
675 // Restore the value of LexingRawMode so that trailing comments
676 // are handled correctly, if we've reached the outermost block.
677 CurPPLexer->LexingRawMode = false;
678 endLoc = CheckEndOfDirective("endif");
679 CurPPLexer->LexingRawMode = true;
680 if (Callbacks)
681 Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);
682 break;
683 } else {
684 DiscardUntilEndOfDirective();
685 }
686 } else if (Sub == "lse") { // "else".
687 // #else directive in a skipping conditional. If not in some other
688 // skipping conditional, and if #else hasn't already been seen, enter it
689 // as a non-skipping conditional.
690 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
691
692 if (!CondInfo.WasSkipping)
693 SkippingRangeState.endLexPass(Hashptr);
694
695 // If this is a #else with a #else before it, report the error.
696 if (CondInfo.FoundElse)
697 Diag(Tok, diag::pp_err_else_after_else);
698
699 // Note that we've seen a #else in this conditional.
700 CondInfo.FoundElse = true;
701
702 // If the conditional is at the top level, and the #if block wasn't
703 // entered, enter the #else block now.
704 if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
705 CondInfo.FoundNonSkip = true;
706 // Restore the value of LexingRawMode so that trailing comments
707 // are handled correctly.
708 CurPPLexer->LexingRawMode = false;
709 endLoc = CheckEndOfDirective("else");
710 CurPPLexer->LexingRawMode = true;
711 if (Callbacks)
712 Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);
713 break;
714 } else {
715 DiscardUntilEndOfDirective(); // C99 6.10p4.
716 }
717 } else if (Sub == "lif") { // "elif".
718 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
719
720 if (!CondInfo.WasSkipping)
721 SkippingRangeState.endLexPass(Hashptr);
722
723 // If this is a #elif with a #else before it, report the error.
724 if (CondInfo.FoundElse)
725 Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;
726
727 // If this is in a skipping block or if we're already handled this #if
728 // block, don't bother parsing the condition.
729 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
730 // FIXME: We should probably do at least some minimal parsing of the
731 // condition to verify that it is well-formed. The current state
732 // allows #elif* directives with completely malformed (or missing)
733 // conditions.
734 DiscardUntilEndOfDirective();
735 } else {
736 // Restore the value of LexingRawMode so that identifiers are
737 // looked up, etc, inside the #elif expression.
738 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
739 CurPPLexer->LexingRawMode = false;
740 IdentifierInfo *IfNDefMacro = nullptr;
741 DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
742 // Stop if Lexer became invalid after hitting code completion token.
743 if (!CurPPLexer)
744 return;
745 const bool CondValue = DER.Conditional;
746 CurPPLexer->LexingRawMode = true;
747 if (Callbacks) {
748 Callbacks->Elif(
749 Tok.getLocation(), DER.ExprRange,
750 (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
751 CondInfo.IfLoc);
752 }
753 // If this condition is true, enter it!
754 if (CondValue) {
755 CondInfo.FoundNonSkip = true;
756 break;
757 }
758 }
759 } else if (Sub == "lifdef" || // "elifdef"
760 Sub == "lifndef") { // "elifndef"
761 bool IsElifDef = Sub == "lifdef";
762 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
763 Token DirectiveToken = Tok;
764
765 if (!CondInfo.WasSkipping)
766 SkippingRangeState.endLexPass(Hashptr);
767
768 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
769 // if this branch is in a skipping block.
770 unsigned DiagID;
771 if (LangOpts.CPlusPlus)
772 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
773 : diag::ext_cxx23_pp_directive;
774 else
775 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
776 : diag::ext_c23_pp_directive;
777 Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
778
779 // If this is a #elif with a #else before it, report the error.
780 if (CondInfo.FoundElse)
781 Diag(Tok, diag::pp_err_elif_after_else)
782 << (IsElifDef ? PED_Elifdef : PED_Elifndef);
783
784 // If this is in a skipping block or if we're already handled this #if
785 // block, don't bother parsing the condition.
786 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
787 // FIXME: We should probably do at least some minimal parsing of the
788 // condition to verify that it is well-formed. The current state
789 // allows #elif* directives with completely malformed (or missing)
790 // conditions.
791 DiscardUntilEndOfDirective();
792 } else {
793 // Restore the value of LexingRawMode so that identifiers are
794 // looked up, etc, inside the #elif[n]def expression.
795 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
796 CurPPLexer->LexingRawMode = false;
797 Token MacroNameTok;
798 ReadMacroName(MacroNameTok);
799 CurPPLexer->LexingRawMode = true;
800
801 // If the macro name token is tok::eod, there was an error that was
802 // already reported.
803 if (MacroNameTok.is(tok::eod)) {
804 // Skip code until we get to #endif. This helps with recovery by
805 // not emitting an error when the #endif is reached.
806 continue;
807 }
808
809 emitMacroExpansionWarnings(MacroNameTok);
810
811 CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef");
812
813 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
814 auto MD = getMacroDefinition(MII);
815 MacroInfo *MI = MD.getMacroInfo();
816
817 if (Callbacks) {
818 if (IsElifDef) {
819 Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok,
820 MD);
821 } else {
822 Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok,
823 MD);
824 }
825 }
826 // If this condition is true, enter it!
827 if (static_cast<bool>(MI) == IsElifDef) {
828 CondInfo.FoundNonSkip = true;
829 break;
830 }
831 }
832 } else {
833 SuggestTypoedDirective(Tok, Directive);
834 }
835 } else {
836 SuggestTypoedDirective(Tok, Directive);
837 }
838
839 CurPPLexer->ParsingPreprocessorDirective = false;
840 // Restore comment saving mode.
841 if (CurLexer) CurLexer->resetExtendedTokenMode();
842 }
843
844 // Finally, if we are out of the conditional (saw an #endif or ran off the end
845 // of the file, just stop skipping and return to lexing whatever came after
846 // the #if block.
847 CurPPLexer->LexingRawMode = false;
848
849 // The last skipped range isn't actually skipped yet if it's truncated
850 // by the end of the preamble; we'll resume parsing after the preamble.
851 if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
852 Callbacks->SourceRangeSkipped(
853 SourceRange(HashTokenLoc, endLoc.isValid()
854 ? endLoc
855 : CurPPLexer->getSourceLocation()),
856 Tok.getLocation());
857 }
858
getModuleForLocation(SourceLocation Loc,bool AllowTextual)859 Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
860 bool AllowTextual) {
861 if (!SourceMgr.isInMainFile(Loc)) {
862 // Try to determine the module of the include directive.
863 // FIXME: Look into directly passing the FileEntry from LookupFile instead.
864 FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
865 if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(IDOfIncl)) {
866 // The include comes from an included file.
867 return HeaderInfo.getModuleMap()
868 .findModuleForHeader(*EntryOfIncl, AllowTextual)
869 .getModule();
870 }
871 }
872
873 // This is either in the main file or not in a file at all. It belongs
874 // to the current module, if there is one.
875 return getLangOpts().CurrentModule.empty()
876 ? nullptr
877 : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc);
878 }
879
880 OptionalFileEntryRef
getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,SourceLocation Loc)881 Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
882 SourceLocation Loc) {
883 Module *IncM = getModuleForLocation(
884 IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
885
886 // Walk up through the include stack, looking through textual headers of M
887 // until we hit a non-textual header that we can #include. (We assume textual
888 // headers of a module with non-textual headers aren't meant to be used to
889 // import entities from the module.)
890 auto &SM = getSourceManager();
891 while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
892 auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
893 auto FE = SM.getFileEntryRefForID(ID);
894 if (!FE)
895 break;
896
897 // We want to find all possible modules that might contain this header, so
898 // search all enclosing directories for module maps and load them.
899 HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr,
900 SourceMgr.isInSystemHeader(Loc));
901
902 bool InPrivateHeader = false;
903 for (auto Header : HeaderInfo.findAllModulesForHeader(*FE)) {
904 if (!Header.isAccessibleFrom(IncM)) {
905 // It's in a private header; we can't #include it.
906 // FIXME: If there's a public header in some module that re-exports it,
907 // then we could suggest including that, but it's not clear that's the
908 // expected way to make this entity visible.
909 InPrivateHeader = true;
910 continue;
911 }
912
913 // Don't suggest explicitly excluded headers.
914 if (Header.getRole() == ModuleMap::ExcludedHeader)
915 continue;
916
917 // We'll suggest including textual headers below if they're
918 // include-guarded.
919 if (Header.getRole() & ModuleMap::TextualHeader)
920 continue;
921
922 // If we have a module import syntax, we shouldn't include a header to
923 // make a particular module visible. Let the caller know they should
924 // suggest an import instead.
925 if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
926 return std::nullopt;
927
928 // If this is an accessible, non-textual header of M's top-level module
929 // that transitively includes the given location and makes the
930 // corresponding module visible, this is the thing to #include.
931 return *FE;
932 }
933
934 // FIXME: If we're bailing out due to a private header, we shouldn't suggest
935 // an import either.
936 if (InPrivateHeader)
937 return std::nullopt;
938
939 // If the header is includable and has an include guard, assume the
940 // intended way to expose its contents is by #include, not by importing a
941 // module that transitively includes it.
942 if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(*FE))
943 return *FE;
944
945 Loc = SM.getIncludeLoc(ID);
946 }
947
948 return std::nullopt;
949 }
950
LookupFile(SourceLocation FilenameLoc,StringRef Filename,bool isAngled,ConstSearchDirIterator FromDir,const FileEntry * FromFile,ConstSearchDirIterator * CurDirArg,SmallVectorImpl<char> * SearchPath,SmallVectorImpl<char> * RelativePath,ModuleMap::KnownHeader * SuggestedModule,bool * IsMapped,bool * IsFrameworkFound,bool SkipCache,bool OpenFile,bool CacheFailures)951 OptionalFileEntryRef Preprocessor::LookupFile(
952 SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
953 ConstSearchDirIterator FromDir, const FileEntry *FromFile,
954 ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
955 SmallVectorImpl<char> *RelativePath,
956 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
957 bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
958 ConstSearchDirIterator CurDirLocal = nullptr;
959 ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
960
961 Module *RequestingModule = getModuleForLocation(
962 FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
963
964 // If the header lookup mechanism may be relative to the current inclusion
965 // stack, record the parent #includes.
966 SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
967 bool BuildSystemModule = false;
968 if (!FromDir && !FromFile) {
969 FileID FID = getCurrentFileLexer()->getFileID();
970 OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
971
972 // If there is no file entry associated with this file, it must be the
973 // predefines buffer or the module includes buffer. Any other file is not
974 // lexed with a normal lexer, so it won't be scanned for preprocessor
975 // directives.
976 //
977 // If we have the predefines buffer, resolve #include references (which come
978 // from the -include command line argument) from the current working
979 // directory instead of relative to the main file.
980 //
981 // If we have the module includes buffer, resolve #include references (which
982 // come from header declarations in the module map) relative to the module
983 // map file.
984 if (!FileEnt) {
985 if (FID == SourceMgr.getMainFileID() && MainFileDir) {
986 auto IncludeDir =
987 HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
988 Filename, getCurrentModule())
989 ? HeaderInfo.getModuleMap().getBuiltinDir()
990 : MainFileDir;
991 Includers.push_back(std::make_pair(std::nullopt, *IncludeDir));
992 BuildSystemModule = getCurrentModule()->IsSystem;
993 } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
994 SourceMgr.getMainFileID()))) {
995 auto CWD = FileMgr.getOptionalDirectoryRef(".");
996 Includers.push_back(std::make_pair(*FileEnt, *CWD));
997 }
998 } else {
999 Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1000 }
1001
1002 // MSVC searches the current include stack from top to bottom for
1003 // headers included by quoted include directives.
1004 // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1005 if (LangOpts.MSVCCompat && !isAngled) {
1006 for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1007 if (IsFileLexer(ISEntry))
1008 if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1009 Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1010 }
1011 }
1012 }
1013
1014 CurDir = CurDirLookup;
1015
1016 if (FromFile) {
1017 // We're supposed to start looking from after a particular file. Search
1018 // the include path until we find that file or run out of files.
1019 ConstSearchDirIterator TmpCurDir = CurDir;
1020 ConstSearchDirIterator TmpFromDir = nullptr;
1021 while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1022 Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir,
1023 Includers, SearchPath, RelativePath, RequestingModule,
1024 SuggestedModule, /*IsMapped=*/nullptr,
1025 /*IsFrameworkFound=*/nullptr, SkipCache)) {
1026 // Keep looking as if this file did a #include_next.
1027 TmpFromDir = TmpCurDir;
1028 ++TmpFromDir;
1029 if (&FE->getFileEntry() == FromFile) {
1030 // Found it.
1031 FromDir = TmpFromDir;
1032 CurDir = TmpCurDir;
1033 break;
1034 }
1035 }
1036 }
1037
1038 // Do a standard file entry lookup.
1039 OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1040 Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath,
1041 RelativePath, RequestingModule, SuggestedModule, IsMapped,
1042 IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1043 if (FE)
1044 return FE;
1045
1046 OptionalFileEntryRef CurFileEnt;
1047 // Otherwise, see if this is a subframework header. If so, this is relative
1048 // to one of the headers on the #include stack. Walk the list of the current
1049 // headers on the #include stack and pass them to HeaderInfo.
1050 if (IsFileLexer()) {
1051 if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1052 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1053 Filename, *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1054 SuggestedModule)) {
1055 return FE;
1056 }
1057 }
1058 }
1059
1060 for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1061 if (IsFileLexer(ISEntry)) {
1062 if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1063 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1064 Filename, *CurFileEnt, SearchPath, RelativePath,
1065 RequestingModule, SuggestedModule)) {
1066 return FE;
1067 }
1068 }
1069 }
1070 }
1071
1072 // Otherwise, we really couldn't find the file.
1073 return std::nullopt;
1074 }
1075
1076 //===----------------------------------------------------------------------===//
1077 // Preprocessor Directive Handling.
1078 //===----------------------------------------------------------------------===//
1079
1080 class Preprocessor::ResetMacroExpansionHelper {
1081 public:
ResetMacroExpansionHelper(Preprocessor * pp)1082 ResetMacroExpansionHelper(Preprocessor *pp)
1083 : PP(pp), save(pp->DisableMacroExpansion) {
1084 if (pp->MacroExpansionInDirectivesOverride)
1085 pp->DisableMacroExpansion = false;
1086 }
1087
~ResetMacroExpansionHelper()1088 ~ResetMacroExpansionHelper() {
1089 PP->DisableMacroExpansion = save;
1090 }
1091
1092 private:
1093 Preprocessor *PP;
1094 bool save;
1095 };
1096
1097 /// Process a directive while looking for the through header or a #pragma
1098 /// hdrstop. The following directives are handled:
1099 /// #include (to check if it is the through header)
1100 /// #define (to warn about macros that don't match the PCH)
1101 /// #pragma (to check for pragma hdrstop).
1102 /// All other directives are completely discarded.
HandleSkippedDirectiveWhileUsingPCH(Token & Result,SourceLocation HashLoc)1103 void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1104 SourceLocation HashLoc) {
1105 if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1106 if (II->getPPKeywordID() == tok::pp_define) {
1107 return HandleDefineDirective(Result,
1108 /*ImmediatelyAfterHeaderGuard=*/false);
1109 }
1110 if (SkippingUntilPCHThroughHeader &&
1111 II->getPPKeywordID() == tok::pp_include) {
1112 return HandleIncludeDirective(HashLoc, Result);
1113 }
1114 if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1115 Lex(Result);
1116 auto *II = Result.getIdentifierInfo();
1117 if (II && II->getName() == "hdrstop")
1118 return HandlePragmaHdrstop(Result);
1119 }
1120 }
1121 DiscardUntilEndOfDirective();
1122 }
1123
1124 /// HandleDirective - This callback is invoked when the lexer sees a # token
1125 /// at the start of a line. This consumes the directive, modifies the
1126 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
1127 /// read is the correct one.
HandleDirective(Token & Result)1128 void Preprocessor::HandleDirective(Token &Result) {
1129 // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1130
1131 // We just parsed a # character at the start of a line, so we're in directive
1132 // mode. Tell the lexer this so any newlines we see will be converted into an
1133 // EOD token (which terminates the directive).
1134 CurPPLexer->ParsingPreprocessorDirective = true;
1135 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1136
1137 bool ImmediatelyAfterTopLevelIfndef =
1138 CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1139 CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1140
1141 ++NumDirectives;
1142
1143 // We are about to read a token. For the multiple-include optimization FA to
1144 // work, we have to remember if we had read any tokens *before* this
1145 // pp-directive.
1146 bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1147
1148 // Save the '#' token in case we need to return it later.
1149 Token SavedHash = Result;
1150
1151 // Read the next token, the directive flavor. This isn't expanded due to
1152 // C99 6.10.3p8.
1153 LexUnexpandedToken(Result);
1154
1155 // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.:
1156 // #define A(x) #x
1157 // A(abc
1158 // #warning blah
1159 // def)
1160 // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1161 // not support this for #include-like directives, since that can result in
1162 // terrible diagnostics, and does not work in GCC.
1163 if (InMacroArgs) {
1164 if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1165 switch (II->getPPKeywordID()) {
1166 case tok::pp_include:
1167 case tok::pp_import:
1168 case tok::pp_include_next:
1169 case tok::pp___include_macros:
1170 case tok::pp_pragma:
1171 Diag(Result, diag::err_embedded_directive) << II->getName();
1172 Diag(*ArgMacro, diag::note_macro_expansion_here)
1173 << ArgMacro->getIdentifierInfo();
1174 DiscardUntilEndOfDirective();
1175 return;
1176 default:
1177 break;
1178 }
1179 }
1180 Diag(Result, diag::ext_embedded_directive);
1181 }
1182
1183 // Temporarily enable macro expansion if set so
1184 // and reset to previous state when returning from this function.
1185 ResetMacroExpansionHelper helper(this);
1186
1187 if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1188 return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());
1189
1190 switch (Result.getKind()) {
1191 case tok::eod:
1192 // Ignore the null directive with regards to the multiple-include
1193 // optimization, i.e. allow the null directive to appear outside of the
1194 // include guard and still enable the multiple-include optimization.
1195 CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1196 return; // null directive.
1197 case tok::code_completion:
1198 setCodeCompletionReached();
1199 if (CodeComplete)
1200 CodeComplete->CodeCompleteDirective(
1201 CurPPLexer->getConditionalStackDepth() > 0);
1202 return;
1203 case tok::numeric_constant: // # 7 GNU line marker directive.
1204 // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1205 // directive. However do permit it in the predefines file, as we use line
1206 // markers to mark the builtin macros as being in a system header.
1207 if (getLangOpts().AsmPreprocessor &&
1208 SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID())
1209 break;
1210 return HandleDigitDirective(Result);
1211 default:
1212 IdentifierInfo *II = Result.getIdentifierInfo();
1213 if (!II) break; // Not an identifier.
1214
1215 // Ask what the preprocessor keyword ID is.
1216 switch (II->getPPKeywordID()) {
1217 default: break;
1218 // C99 6.10.1 - Conditional Inclusion.
1219 case tok::pp_if:
1220 return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);
1221 case tok::pp_ifdef:
1222 return HandleIfdefDirective(Result, SavedHash, false,
1223 true /*not valid for miopt*/);
1224 case tok::pp_ifndef:
1225 return HandleIfdefDirective(Result, SavedHash, true,
1226 ReadAnyTokensBeforeDirective);
1227 case tok::pp_elif:
1228 case tok::pp_elifdef:
1229 case tok::pp_elifndef:
1230 return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID());
1231
1232 case tok::pp_else:
1233 return HandleElseDirective(Result, SavedHash);
1234 case tok::pp_endif:
1235 return HandleEndifDirective(Result);
1236
1237 // C99 6.10.2 - Source File Inclusion.
1238 case tok::pp_include:
1239 // Handle #include.
1240 return HandleIncludeDirective(SavedHash.getLocation(), Result);
1241 case tok::pp___include_macros:
1242 // Handle -imacros.
1243 return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
1244
1245 // C99 6.10.3 - Macro Replacement.
1246 case tok::pp_define:
1247 return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
1248 case tok::pp_undef:
1249 return HandleUndefDirective();
1250
1251 // C99 6.10.4 - Line Control.
1252 case tok::pp_line:
1253 return HandleLineDirective();
1254
1255 // C99 6.10.5 - Error Directive.
1256 case tok::pp_error:
1257 return HandleUserDiagnosticDirective(Result, false);
1258
1259 // C99 6.10.6 - Pragma Directive.
1260 case tok::pp_pragma:
1261 return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
1262
1263 // GNU Extensions.
1264 case tok::pp_import:
1265 return HandleImportDirective(SavedHash.getLocation(), Result);
1266 case tok::pp_include_next:
1267 return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
1268
1269 case tok::pp_warning:
1270 if (LangOpts.CPlusPlus)
1271 Diag(Result, LangOpts.CPlusPlus23
1272 ? diag::warn_cxx23_compat_warning_directive
1273 : diag::ext_pp_warning_directive)
1274 << /*C++23*/ 1;
1275 else
1276 Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1277 : diag::ext_pp_warning_directive)
1278 << /*C23*/ 0;
1279
1280 return HandleUserDiagnosticDirective(Result, true);
1281 case tok::pp_ident:
1282 return HandleIdentSCCSDirective(Result);
1283 case tok::pp_sccs:
1284 return HandleIdentSCCSDirective(Result);
1285 case tok::pp_assert:
1286 //isExtension = true; // FIXME: implement #assert
1287 break;
1288 case tok::pp_unassert:
1289 //isExtension = true; // FIXME: implement #unassert
1290 break;
1291
1292 case tok::pp___public_macro:
1293 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1294 return HandleMacroPublicDirective(Result);
1295 break;
1296
1297 case tok::pp___private_macro:
1298 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1299 return HandleMacroPrivateDirective();
1300 break;
1301 }
1302 break;
1303 }
1304
1305 // If this is a .S file, treat unknown # directives as non-preprocessor
1306 // directives. This is important because # may be a comment or introduce
1307 // various pseudo-ops. Just return the # token and push back the following
1308 // token to be lexed next time.
1309 if (getLangOpts().AsmPreprocessor) {
1310 auto Toks = std::make_unique<Token[]>(2);
1311 // Return the # and the token after it.
1312 Toks[0] = SavedHash;
1313 Toks[1] = Result;
1314
1315 // If the second token is a hashhash token, then we need to translate it to
1316 // unknown so the token lexer doesn't try to perform token pasting.
1317 if (Result.is(tok::hashhash))
1318 Toks[1].setKind(tok::unknown);
1319
1320 // Enter this token stream so that we re-lex the tokens. Make sure to
1321 // enable macro expansion, in case the token after the # is an identifier
1322 // that is expanded.
1323 EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
1324 return;
1325 }
1326
1327 // If we reached here, the preprocessing token is not valid!
1328 // Start suggesting if a similar directive found.
1329 Diag(Result, diag::err_pp_invalid_directive) << 0;
1330
1331 // Read the rest of the PP line.
1332 DiscardUntilEndOfDirective();
1333
1334 // Okay, we're done parsing the directive.
1335 }
1336
1337 /// GetLineValue - Convert a numeric token into an unsigned value, emitting
1338 /// Diagnostic DiagID if it is invalid, and returning the value in Val.
GetLineValue(Token & DigitTok,unsigned & Val,unsigned DiagID,Preprocessor & PP,bool IsGNULineDirective=false)1339 static bool GetLineValue(Token &DigitTok, unsigned &Val,
1340 unsigned DiagID, Preprocessor &PP,
1341 bool IsGNULineDirective=false) {
1342 if (DigitTok.isNot(tok::numeric_constant)) {
1343 PP.Diag(DigitTok, DiagID);
1344
1345 if (DigitTok.isNot(tok::eod))
1346 PP.DiscardUntilEndOfDirective();
1347 return true;
1348 }
1349
1350 SmallString<64> IntegerBuffer;
1351 IntegerBuffer.resize(DigitTok.getLength());
1352 const char *DigitTokBegin = &IntegerBuffer[0];
1353 bool Invalid = false;
1354 unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
1355 if (Invalid)
1356 return true;
1357
1358 // Verify that we have a simple digit-sequence, and compute the value. This
1359 // is always a simple digit string computed in decimal, so we do this manually
1360 // here.
1361 Val = 0;
1362 for (unsigned i = 0; i != ActualLength; ++i) {
1363 // C++1y [lex.fcon]p1:
1364 // Optional separating single quotes in a digit-sequence are ignored
1365 if (DigitTokBegin[i] == '\'')
1366 continue;
1367
1368 if (!isDigit(DigitTokBegin[i])) {
1369 PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
1370 diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1371 PP.DiscardUntilEndOfDirective();
1372 return true;
1373 }
1374
1375 unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1376 if (NextVal < Val) { // overflow.
1377 PP.Diag(DigitTok, DiagID);
1378 PP.DiscardUntilEndOfDirective();
1379 return true;
1380 }
1381 Val = NextVal;
1382 }
1383
1384 if (DigitTokBegin[0] == '0' && Val)
1385 PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
1386 << IsGNULineDirective;
1387
1388 return false;
1389 }
1390
1391 /// Handle a \#line directive: C99 6.10.4.
1392 ///
1393 /// The two acceptable forms are:
1394 /// \verbatim
1395 /// # line digit-sequence
1396 /// # line digit-sequence "s-char-sequence"
1397 /// \endverbatim
HandleLineDirective()1398 void Preprocessor::HandleLineDirective() {
1399 // Read the line # and string argument. Per C99 6.10.4p5, these tokens are
1400 // expanded.
1401 Token DigitTok;
1402 Lex(DigitTok);
1403
1404 // Validate the number and convert it to an unsigned.
1405 unsigned LineNo;
1406 if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
1407 return;
1408
1409 if (LineNo == 0)
1410 Diag(DigitTok, diag::ext_pp_line_zero);
1411
1412 // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1413 // number greater than 2147483647". C90 requires that the line # be <= 32767.
1414 unsigned LineLimit = 32768U;
1415 if (LangOpts.C99 || LangOpts.CPlusPlus11)
1416 LineLimit = 2147483648U;
1417 if (LineNo >= LineLimit)
1418 Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
1419 else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1420 Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
1421
1422 int FilenameID = -1;
1423 Token StrTok;
1424 Lex(StrTok);
1425
1426 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
1427 // string followed by eod.
1428 if (StrTok.is(tok::eod))
1429 ; // ok
1430 else if (StrTok.isNot(tok::string_literal)) {
1431 Diag(StrTok, diag::err_pp_line_invalid_filename);
1432 DiscardUntilEndOfDirective();
1433 return;
1434 } else if (StrTok.hasUDSuffix()) {
1435 Diag(StrTok, diag::err_invalid_string_udl);
1436 DiscardUntilEndOfDirective();
1437 return;
1438 } else {
1439 // Parse and validate the string, converting it into a unique ID.
1440 StringLiteralParser Literal(StrTok, *this);
1441 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1442 if (Literal.hadError) {
1443 DiscardUntilEndOfDirective();
1444 return;
1445 }
1446 if (Literal.Pascal) {
1447 Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1448 DiscardUntilEndOfDirective();
1449 return;
1450 }
1451 FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1452
1453 // Verify that there is nothing after the string, other than EOD. Because
1454 // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1455 CheckEndOfDirective("line", true);
1456 }
1457
1458 // Take the file kind of the file containing the #line directive. #line
1459 // directives are often used for generated sources from the same codebase, so
1460 // the new file should generally be classified the same way as the current
1461 // file. This is visible in GCC's pre-processed output, which rewrites #line
1462 // to GNU line markers.
1463 SrcMgr::CharacteristicKind FileKind =
1464 SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1465
1466 SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
1467 false, FileKind);
1468
1469 if (Callbacks)
1470 Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
1471 PPCallbacks::RenameFile, FileKind);
1472 }
1473
1474 /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1475 /// marker directive.
ReadLineMarkerFlags(bool & IsFileEntry,bool & IsFileExit,SrcMgr::CharacteristicKind & FileKind,Preprocessor & PP)1476 static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1477 SrcMgr::CharacteristicKind &FileKind,
1478 Preprocessor &PP) {
1479 unsigned FlagVal;
1480 Token FlagTok;
1481 PP.Lex(FlagTok);
1482 if (FlagTok.is(tok::eod)) return false;
1483 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1484 return true;
1485
1486 if (FlagVal == 1) {
1487 IsFileEntry = true;
1488
1489 PP.Lex(FlagTok);
1490 if (FlagTok.is(tok::eod)) return false;
1491 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1492 return true;
1493 } else if (FlagVal == 2) {
1494 IsFileExit = true;
1495
1496 SourceManager &SM = PP.getSourceManager();
1497 // If we are leaving the current presumed file, check to make sure the
1498 // presumed include stack isn't empty!
1499 FileID CurFileID =
1500 SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
1501 PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
1502 if (PLoc.isInvalid())
1503 return true;
1504
1505 // If there is no include loc (main file) or if the include loc is in a
1506 // different physical file, then we aren't in a "1" line marker flag region.
1507 SourceLocation IncLoc = PLoc.getIncludeLoc();
1508 if (IncLoc.isInvalid() ||
1509 SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
1510 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
1511 PP.DiscardUntilEndOfDirective();
1512 return true;
1513 }
1514
1515 PP.Lex(FlagTok);
1516 if (FlagTok.is(tok::eod)) return false;
1517 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1518 return true;
1519 }
1520
1521 // We must have 3 if there are still flags.
1522 if (FlagVal != 3) {
1523 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1524 PP.DiscardUntilEndOfDirective();
1525 return true;
1526 }
1527
1528 FileKind = SrcMgr::C_System;
1529
1530 PP.Lex(FlagTok);
1531 if (FlagTok.is(tok::eod)) return false;
1532 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1533 return true;
1534
1535 // We must have 4 if there is yet another flag.
1536 if (FlagVal != 4) {
1537 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1538 PP.DiscardUntilEndOfDirective();
1539 return true;
1540 }
1541
1542 FileKind = SrcMgr::C_ExternCSystem;
1543
1544 PP.Lex(FlagTok);
1545 if (FlagTok.is(tok::eod)) return false;
1546
1547 // There are no more valid flags here.
1548 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1549 PP.DiscardUntilEndOfDirective();
1550 return true;
1551 }
1552
1553 /// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1554 /// one of the following forms:
1555 ///
1556 /// # 42
1557 /// # 42 "file" ('1' | '2')?
1558 /// # 42 "file" ('1' | '2')? '3' '4'?
1559 ///
HandleDigitDirective(Token & DigitTok)1560 void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1561 // Validate the number and convert it to an unsigned. GNU does not have a
1562 // line # limit other than it fit in 32-bits.
1563 unsigned LineNo;
1564 if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
1565 *this, true))
1566 return;
1567
1568 Token StrTok;
1569 Lex(StrTok);
1570
1571 bool IsFileEntry = false, IsFileExit = false;
1572 int FilenameID = -1;
1573 SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1574
1575 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a
1576 // string followed by eod.
1577 if (StrTok.is(tok::eod)) {
1578 Diag(StrTok, diag::ext_pp_gnu_line_directive);
1579 // Treat this like "#line NN", which doesn't change file characteristics.
1580 FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1581 } else if (StrTok.isNot(tok::string_literal)) {
1582 Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1583 DiscardUntilEndOfDirective();
1584 return;
1585 } else if (StrTok.hasUDSuffix()) {
1586 Diag(StrTok, diag::err_invalid_string_udl);
1587 DiscardUntilEndOfDirective();
1588 return;
1589 } else {
1590 // Parse and validate the string, converting it into a unique ID.
1591 StringLiteralParser Literal(StrTok, *this);
1592 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1593 if (Literal.hadError) {
1594 DiscardUntilEndOfDirective();
1595 return;
1596 }
1597 if (Literal.Pascal) {
1598 Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1599 DiscardUntilEndOfDirective();
1600 return;
1601 }
1602
1603 // If a filename was present, read any flags that are present.
1604 if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
1605 return;
1606 if (!SourceMgr.isWrittenInBuiltinFile(DigitTok.getLocation()) &&
1607 !SourceMgr.isWrittenInCommandLineFile(DigitTok.getLocation()))
1608 Diag(StrTok, diag::ext_pp_gnu_line_directive);
1609
1610 // Exiting to an empty string means pop to the including file, so leave
1611 // FilenameID as -1 in that case.
1612 if (!(IsFileExit && Literal.GetString().empty()))
1613 FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1614 }
1615
1616 // Create a line note with this information.
1617 SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1618 IsFileExit, FileKind);
1619
1620 // If the preprocessor has callbacks installed, notify them of the #line
1621 // change. This is used so that the line marker comes out in -E mode for
1622 // example.
1623 if (Callbacks) {
1624 PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1625 if (IsFileEntry)
1626 Reason = PPCallbacks::EnterFile;
1627 else if (IsFileExit)
1628 Reason = PPCallbacks::ExitFile;
1629
1630 Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
1631 }
1632 }
1633
1634 /// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1635 ///
HandleUserDiagnosticDirective(Token & Tok,bool isWarning)1636 void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1637 bool isWarning) {
1638 // Read the rest of the line raw. We do this because we don't want macros
1639 // to be expanded and we don't require that the tokens be valid preprocessing
1640 // tokens. For example, this is allowed: "#warning ` 'foo". GCC does
1641 // collapse multiple consecutive white space between tokens, but this isn't
1642 // specified by the standard.
1643 SmallString<128> Message;
1644 CurLexer->ReadToEndOfLine(&Message);
1645
1646 // Find the first non-whitespace character, so that we can make the
1647 // diagnostic more succinct.
1648 StringRef Msg = Message.str().ltrim(' ');
1649
1650 if (isWarning)
1651 Diag(Tok, diag::pp_hash_warning) << Msg;
1652 else
1653 Diag(Tok, diag::err_pp_hash_error) << Msg;
1654 }
1655
1656 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1657 ///
HandleIdentSCCSDirective(Token & Tok)1658 void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1659 // Yes, this directive is an extension.
1660 Diag(Tok, diag::ext_pp_ident_directive);
1661
1662 // Read the string argument.
1663 Token StrTok;
1664 Lex(StrTok);
1665
1666 // If the token kind isn't a string, it's a malformed directive.
1667 if (StrTok.isNot(tok::string_literal) &&
1668 StrTok.isNot(tok::wide_string_literal)) {
1669 Diag(StrTok, diag::err_pp_malformed_ident);
1670 if (StrTok.isNot(tok::eod))
1671 DiscardUntilEndOfDirective();
1672 return;
1673 }
1674
1675 if (StrTok.hasUDSuffix()) {
1676 Diag(StrTok, diag::err_invalid_string_udl);
1677 DiscardUntilEndOfDirective();
1678 return;
1679 }
1680
1681 // Verify that there is nothing after the string, other than EOD.
1682 CheckEndOfDirective("ident");
1683
1684 if (Callbacks) {
1685 bool Invalid = false;
1686 std::string Str = getSpelling(StrTok, &Invalid);
1687 if (!Invalid)
1688 Callbacks->Ident(Tok.getLocation(), Str);
1689 }
1690 }
1691
1692 /// Handle a #public directive.
HandleMacroPublicDirective(Token & Tok)1693 void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1694 Token MacroNameTok;
1695 ReadMacroName(MacroNameTok, MU_Undef);
1696
1697 // Error reading macro name? If so, diagnostic already issued.
1698 if (MacroNameTok.is(tok::eod))
1699 return;
1700
1701 // Check to see if this is the last token on the #__public_macro line.
1702 CheckEndOfDirective("__public_macro");
1703
1704 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1705 // Okay, we finally have a valid identifier to undef.
1706 MacroDirective *MD = getLocalMacroDirective(II);
1707
1708 // If the macro is not defined, this is an error.
1709 if (!MD) {
1710 Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1711 return;
1712 }
1713
1714 // Note that this macro has now been exported.
1715 appendMacroDirective(II, AllocateVisibilityMacroDirective(
1716 MacroNameTok.getLocation(), /*isPublic=*/true));
1717 }
1718
1719 /// Handle a #private directive.
HandleMacroPrivateDirective()1720 void Preprocessor::HandleMacroPrivateDirective() {
1721 Token MacroNameTok;
1722 ReadMacroName(MacroNameTok, MU_Undef);
1723
1724 // Error reading macro name? If so, diagnostic already issued.
1725 if (MacroNameTok.is(tok::eod))
1726 return;
1727
1728 // Check to see if this is the last token on the #__private_macro line.
1729 CheckEndOfDirective("__private_macro");
1730
1731 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1732 // Okay, we finally have a valid identifier to undef.
1733 MacroDirective *MD = getLocalMacroDirective(II);
1734
1735 // If the macro is not defined, this is an error.
1736 if (!MD) {
1737 Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1738 return;
1739 }
1740
1741 // Note that this macro has now been marked private.
1742 appendMacroDirective(II, AllocateVisibilityMacroDirective(
1743 MacroNameTok.getLocation(), /*isPublic=*/false));
1744 }
1745
1746 //===----------------------------------------------------------------------===//
1747 // Preprocessor Include Directive Handling.
1748 //===----------------------------------------------------------------------===//
1749
1750 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1751 /// checked and spelled filename, e.g. as an operand of \#include. This returns
1752 /// true if the input filename was in <>'s or false if it were in ""'s. The
1753 /// caller is expected to provide a buffer that is large enough to hold the
1754 /// spelling of the filename, but is also expected to handle the case when
1755 /// this method decides to use a different buffer.
GetIncludeFilenameSpelling(SourceLocation Loc,StringRef & Buffer)1756 bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1757 StringRef &Buffer) {
1758 // Get the text form of the filename.
1759 assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1760
1761 // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1762 // C++20 [lex.header]/2:
1763 //
1764 // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1765 // in C: behavior is undefined
1766 // in C++: program is conditionally-supported with implementation-defined
1767 // semantics
1768
1769 // Make sure the filename is <x> or "x".
1770 bool isAngled;
1771 if (Buffer[0] == '<') {
1772 if (Buffer.back() != '>') {
1773 Diag(Loc, diag::err_pp_expects_filename);
1774 Buffer = StringRef();
1775 return true;
1776 }
1777 isAngled = true;
1778 } else if (Buffer[0] == '"') {
1779 if (Buffer.back() != '"') {
1780 Diag(Loc, diag::err_pp_expects_filename);
1781 Buffer = StringRef();
1782 return true;
1783 }
1784 isAngled = false;
1785 } else {
1786 Diag(Loc, diag::err_pp_expects_filename);
1787 Buffer = StringRef();
1788 return true;
1789 }
1790
1791 // Diagnose #include "" as invalid.
1792 if (Buffer.size() <= 2) {
1793 Diag(Loc, diag::err_pp_empty_filename);
1794 Buffer = StringRef();
1795 return true;
1796 }
1797
1798 // Skip the brackets.
1799 Buffer = Buffer.substr(1, Buffer.size()-2);
1800 return isAngled;
1801 }
1802
1803 /// Push a token onto the token stream containing an annotation.
EnterAnnotationToken(SourceRange Range,tok::TokenKind Kind,void * AnnotationVal)1804 void Preprocessor::EnterAnnotationToken(SourceRange Range,
1805 tok::TokenKind Kind,
1806 void *AnnotationVal) {
1807 // FIXME: Produce this as the current token directly, rather than
1808 // allocating a new token for it.
1809 auto Tok = std::make_unique<Token[]>(1);
1810 Tok[0].startToken();
1811 Tok[0].setKind(Kind);
1812 Tok[0].setLocation(Range.getBegin());
1813 Tok[0].setAnnotationEndLoc(Range.getEnd());
1814 Tok[0].setAnnotationValue(AnnotationVal);
1815 EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
1816 }
1817
1818 /// Produce a diagnostic informing the user that a #include or similar
1819 /// was implicitly treated as a module import.
diagnoseAutoModuleImport(Preprocessor & PP,SourceLocation HashLoc,Token & IncludeTok,ArrayRef<std::pair<IdentifierInfo *,SourceLocation>> Path,SourceLocation PathEnd)1820 static void diagnoseAutoModuleImport(
1821 Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
1822 ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
1823 SourceLocation PathEnd) {
1824 SmallString<128> PathString;
1825 for (size_t I = 0, N = Path.size(); I != N; ++I) {
1826 if (I)
1827 PathString += '.';
1828 PathString += Path[I].first->getName();
1829 }
1830
1831 int IncludeKind = 0;
1832 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1833 case tok::pp_include:
1834 IncludeKind = 0;
1835 break;
1836
1837 case tok::pp_import:
1838 IncludeKind = 1;
1839 break;
1840
1841 case tok::pp_include_next:
1842 IncludeKind = 2;
1843 break;
1844
1845 case tok::pp___include_macros:
1846 IncludeKind = 3;
1847 break;
1848
1849 default:
1850 llvm_unreachable("unknown include directive kind");
1851 }
1852
1853 PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation)
1854 << IncludeKind << PathString;
1855 }
1856
1857 // Given a vector of path components and a string containing the real
1858 // path to the file, build a properly-cased replacement in the vector,
1859 // and return true if the replacement should be suggested.
trySimplifyPath(SmallVectorImpl<StringRef> & Components,StringRef RealPathName,llvm::sys::path::Style Separator)1860 static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1861 StringRef RealPathName,
1862 llvm::sys::path::Style Separator) {
1863 auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
1864 auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
1865 int Cnt = 0;
1866 bool SuggestReplacement = false;
1867
1868 auto IsSep = [Separator](StringRef Component) {
1869 return Component.size() == 1 &&
1870 llvm::sys::path::is_separator(Component[0], Separator);
1871 };
1872
1873 // Below is a best-effort to handle ".." in paths. It is admittedly
1874 // not 100% correct in the presence of symlinks.
1875 for (auto &Component : llvm::reverse(Components)) {
1876 if ("." == Component) {
1877 } else if (".." == Component) {
1878 ++Cnt;
1879 } else if (Cnt) {
1880 --Cnt;
1881 } else if (RealPathComponentIter != RealPathComponentEnd) {
1882 if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
1883 Component != *RealPathComponentIter) {
1884 // If these non-separator path components differ by more than just case,
1885 // then we may be looking at symlinked paths. Bail on this diagnostic to
1886 // avoid noisy false positives.
1887 SuggestReplacement =
1888 RealPathComponentIter->equals_insensitive(Component);
1889 if (!SuggestReplacement)
1890 break;
1891 Component = *RealPathComponentIter;
1892 }
1893 ++RealPathComponentIter;
1894 }
1895 }
1896 return SuggestReplacement;
1897 }
1898
checkModuleIsAvailable(const LangOptions & LangOpts,const TargetInfo & TargetInfo,const Module & M,DiagnosticsEngine & Diags)1899 bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
1900 const TargetInfo &TargetInfo,
1901 const Module &M,
1902 DiagnosticsEngine &Diags) {
1903 Module::Requirement Requirement;
1904 Module::UnresolvedHeaderDirective MissingHeader;
1905 Module *ShadowingModule = nullptr;
1906 if (M.isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
1907 ShadowingModule))
1908 return false;
1909
1910 if (MissingHeader.FileNameLoc.isValid()) {
1911 Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
1912 << MissingHeader.IsUmbrella << MissingHeader.FileName;
1913 } else if (ShadowingModule) {
1914 Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name;
1915 Diags.Report(ShadowingModule->DefinitionLoc,
1916 diag::note_previous_definition);
1917 } else {
1918 // FIXME: Track the location at which the requirement was specified, and
1919 // use it here.
1920 Diags.Report(M.DefinitionLoc, diag::err_module_unavailable)
1921 << M.getFullModuleName() << Requirement.second << Requirement.first;
1922 }
1923 return true;
1924 }
1925
1926 std::pair<ConstSearchDirIterator, const FileEntry *>
getIncludeNextStart(const Token & IncludeNextTok) const1927 Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
1928 // #include_next is like #include, except that we start searching after
1929 // the current found directory. If we can't do this, issue a
1930 // diagnostic.
1931 ConstSearchDirIterator Lookup = CurDirLookup;
1932 const FileEntry *LookupFromFile = nullptr;
1933
1934 if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
1935 // If the main file is a header, then it's either for PCH/AST generation,
1936 // or libclang opened it. Either way, handle it as a normal include below
1937 // and do not complain about include_next.
1938 } else if (isInPrimaryFile()) {
1939 Lookup = nullptr;
1940 Diag(IncludeNextTok, diag::pp_include_next_in_primary);
1941 } else if (CurLexerSubmodule) {
1942 // Start looking up in the directory *after* the one in which the current
1943 // file would be found, if any.
1944 assert(CurPPLexer && "#include_next directive in macro?");
1945 if (auto FE = CurPPLexer->getFileEntry())
1946 LookupFromFile = *FE;
1947 Lookup = nullptr;
1948 } else if (!Lookup) {
1949 // The current file was not found by walking the include path. Either it
1950 // is the primary file (handled above), or it was found by absolute path,
1951 // or it was found relative to such a file.
1952 // FIXME: Track enough information so we know which case we're in.
1953 Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
1954 } else {
1955 // Start looking up in the next directory.
1956 ++Lookup;
1957 }
1958
1959 return {Lookup, LookupFromFile};
1960 }
1961
1962 /// HandleIncludeDirective - The "\#include" tokens have just been read, read
1963 /// the file to be included from the lexer, then include it! This is a common
1964 /// routine with functionality shared between \#include, \#include_next and
1965 /// \#import. LookupFrom is set when this is a \#include_next directive, it
1966 /// specifies the file to start searching from.
HandleIncludeDirective(SourceLocation HashLoc,Token & IncludeTok,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)1967 void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
1968 Token &IncludeTok,
1969 ConstSearchDirIterator LookupFrom,
1970 const FileEntry *LookupFromFile) {
1971 Token FilenameTok;
1972 if (LexHeaderName(FilenameTok))
1973 return;
1974
1975 if (FilenameTok.isNot(tok::header_name)) {
1976 Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
1977 if (FilenameTok.isNot(tok::eod))
1978 DiscardUntilEndOfDirective();
1979 return;
1980 }
1981
1982 // Verify that there is nothing after the filename, other than EOD. Note
1983 // that we allow macros that expand to nothing after the filename, because
1984 // this falls into the category of "#include pp-tokens new-line" specified
1985 // in C99 6.10.2p4.
1986 SourceLocation EndLoc =
1987 CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
1988
1989 auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
1990 EndLoc, LookupFrom, LookupFromFile);
1991 switch (Action.Kind) {
1992 case ImportAction::None:
1993 case ImportAction::SkippedModuleImport:
1994 break;
1995 case ImportAction::ModuleBegin:
1996 EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
1997 tok::annot_module_begin, Action.ModuleForHeader);
1998 break;
1999 case ImportAction::HeaderUnitImport:
2000 EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
2001 Action.ModuleForHeader);
2002 break;
2003 case ImportAction::ModuleImport:
2004 EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2005 tok::annot_module_include, Action.ModuleForHeader);
2006 break;
2007 case ImportAction::Failure:
2008 assert(TheModuleLoader.HadFatalFailure &&
2009 "This should be an early exit only to a fatal error");
2010 TheModuleLoader.HadFatalFailure = true;
2011 IncludeTok.setKind(tok::eof);
2012 CurLexer->cutOffLexing();
2013 return;
2014 }
2015 }
2016
LookupHeaderIncludeOrImport(ConstSearchDirIterator * CurDir,StringRef & Filename,SourceLocation FilenameLoc,CharSourceRange FilenameRange,const Token & FilenameTok,bool & IsFrameworkFound,bool IsImportDecl,bool & IsMapped,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile,StringRef & LookupFilename,SmallVectorImpl<char> & RelativePath,SmallVectorImpl<char> & SearchPath,ModuleMap::KnownHeader & SuggestedModule,bool isAngled)2017 OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2018 ConstSearchDirIterator *CurDir, StringRef &Filename,
2019 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2020 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2021 bool &IsMapped, ConstSearchDirIterator LookupFrom,
2022 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2023 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2024 ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2025 auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2026 if (LangOpts.AsmPreprocessor)
2027 return;
2028
2029 Module *RequestingModule = getModuleForLocation(
2030 FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
2031 bool RequestingModuleIsModuleInterface =
2032 !SourceMgr.isInMainFile(FilenameLoc);
2033
2034 HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2035 RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2036 Filename, FE);
2037 };
2038
2039 OptionalFileEntryRef File = LookupFile(
2040 FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir,
2041 Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2042 &SuggestedModule, &IsMapped, &IsFrameworkFound);
2043 if (File) {
2044 DiagnoseHeaderInclusion(*File);
2045 return File;
2046 }
2047
2048 // Give the clients a chance to silently skip this include.
2049 if (Callbacks && Callbacks->FileNotFound(Filename))
2050 return std::nullopt;
2051
2052 if (SuppressIncludeNotFoundError)
2053 return std::nullopt;
2054
2055 // If the file could not be located and it was included via angle
2056 // brackets, we can attempt a lookup as though it were a quoted path to
2057 // provide the user with a possible fixit.
2058 if (isAngled) {
2059 OptionalFileEntryRef File = LookupFile(
2060 FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir,
2061 Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2062 &SuggestedModule, &IsMapped,
2063 /*IsFrameworkFound=*/nullptr);
2064 if (File) {
2065 DiagnoseHeaderInclusion(*File);
2066 Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
2067 << Filename << IsImportDecl
2068 << FixItHint::CreateReplacement(FilenameRange,
2069 "\"" + Filename.str() + "\"");
2070 return File;
2071 }
2072 }
2073
2074 // Check for likely typos due to leading or trailing non-isAlphanumeric
2075 // characters
2076 StringRef OriginalFilename = Filename;
2077 if (LangOpts.SpellChecking) {
2078 // A heuristic to correct a typo file name by removing leading and
2079 // trailing non-isAlphanumeric characters.
2080 auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2081 Filename = Filename.drop_until(isAlphanumeric);
2082 while (!Filename.empty() && !isAlphanumeric(Filename.back())) {
2083 Filename = Filename.drop_back();
2084 }
2085 return Filename;
2086 };
2087 StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2088 StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2089
2090 OptionalFileEntryRef File = LookupFile(
2091 FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom,
2092 LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr,
2093 Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
2094 /*IsFrameworkFound=*/nullptr);
2095 if (File) {
2096 DiagnoseHeaderInclusion(*File);
2097 auto Hint =
2098 isAngled ? FixItHint::CreateReplacement(
2099 FilenameRange, "<" + TypoCorrectionName.str() + ">")
2100 : FixItHint::CreateReplacement(
2101 FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
2102 Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
2103 << OriginalFilename << TypoCorrectionName << Hint;
2104 // We found the file, so set the Filename to the name after typo
2105 // correction.
2106 Filename = TypoCorrectionName;
2107 LookupFilename = TypoCorrectionLookupName;
2108 return File;
2109 }
2110 }
2111
2112 // If the file is still not found, just go with the vanilla diagnostic
2113 assert(!File && "expected missing file");
2114 Diag(FilenameTok, diag::err_pp_file_not_found)
2115 << OriginalFilename << FilenameRange;
2116 if (IsFrameworkFound) {
2117 size_t SlashPos = OriginalFilename.find('/');
2118 assert(SlashPos != StringRef::npos &&
2119 "Include with framework name should have '/' in the filename");
2120 StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
2121 FrameworkCacheEntry &CacheEntry =
2122 HeaderInfo.LookupFrameworkCache(FrameworkName);
2123 assert(CacheEntry.Directory && "Found framework should be in cache");
2124 Diag(FilenameTok, diag::note_pp_framework_without_header)
2125 << OriginalFilename.substr(SlashPos + 1) << FrameworkName
2126 << CacheEntry.Directory->getName();
2127 }
2128
2129 return std::nullopt;
2130 }
2131
2132 /// Handle either a #include-like directive or an import declaration that names
2133 /// a header file.
2134 ///
2135 /// \param HashLoc The location of the '#' token for an include, or
2136 /// SourceLocation() for an import declaration.
2137 /// \param IncludeTok The include / include_next / import token.
2138 /// \param FilenameTok The header-name token.
2139 /// \param EndLoc The location at which any imported macros become visible.
2140 /// \param LookupFrom For #include_next, the starting directory for the
2141 /// directory lookup.
2142 /// \param LookupFromFile For #include_next, the starting file for the directory
2143 /// lookup.
HandleHeaderIncludeOrImport(SourceLocation HashLoc,Token & IncludeTok,Token & FilenameTok,SourceLocation EndLoc,ConstSearchDirIterator LookupFrom,const FileEntry * LookupFromFile)2144 Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2145 SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2146 SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2147 const FileEntry *LookupFromFile) {
2148 SmallString<128> FilenameBuffer;
2149 StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
2150 SourceLocation CharEnd = FilenameTok.getEndLoc();
2151
2152 CharSourceRange FilenameRange
2153 = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
2154 StringRef OriginalFilename = Filename;
2155 bool isAngled =
2156 GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
2157
2158 // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2159 // error.
2160 if (Filename.empty())
2161 return {ImportAction::None};
2162
2163 bool IsImportDecl = HashLoc.isInvalid();
2164 SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2165
2166 // Complain about attempts to #include files in an audit pragma.
2167 if (PragmaARCCFCodeAuditedInfo.second.isValid()) {
2168 Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2169 Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here);
2170
2171 // Immediately leave the pragma.
2172 PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
2173 }
2174
2175 // Complain about attempts to #include files in an assume-nonnull pragma.
2176 if (PragmaAssumeNonNullLoc.isValid()) {
2177 Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2178 Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
2179
2180 // Immediately leave the pragma.
2181 PragmaAssumeNonNullLoc = SourceLocation();
2182 }
2183
2184 if (HeaderInfo.HasIncludeAliasMap()) {
2185 // Map the filename with the brackets still attached. If the name doesn't
2186 // map to anything, fall back on the filename we've already gotten the
2187 // spelling for.
2188 StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
2189 if (!NewName.empty())
2190 Filename = NewName;
2191 }
2192
2193 // Search include directories.
2194 bool IsMapped = false;
2195 bool IsFrameworkFound = false;
2196 ConstSearchDirIterator CurDir = nullptr;
2197 SmallString<1024> SearchPath;
2198 SmallString<1024> RelativePath;
2199 // We get the raw path only if we have 'Callbacks' to which we later pass
2200 // the path.
2201 ModuleMap::KnownHeader SuggestedModule;
2202 SourceLocation FilenameLoc = FilenameTok.getLocation();
2203 StringRef LookupFilename = Filename;
2204
2205 // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2206 // is unnecessary on Windows since the filesystem there handles backslashes.
2207 SmallString<128> NormalizedPath;
2208 llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2209 if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
2210 NormalizedPath = Filename.str();
2211 llvm::sys::path::native(NormalizedPath);
2212 LookupFilename = NormalizedPath;
2213 BackslashStyle = llvm::sys::path::Style::windows;
2214 }
2215
2216 OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2217 &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2218 IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2219 LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2220
2221 if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2222 if (File && isPCHThroughHeader(&File->getFileEntry()))
2223 SkippingUntilPCHThroughHeader = false;
2224 return {ImportAction::None};
2225 }
2226
2227 // Should we enter the source file? Set to Skip if either the source file is
2228 // known to have no effect beyond its effect on module visibility -- that is,
2229 // if it's got an include guard that is already defined, set to Import if it
2230 // is a modular header we've already built and should import.
2231
2232 // For C++20 Modules
2233 // [cpp.include]/7 If the header identified by the header-name denotes an
2234 // importable header, it is implementation-defined whether the #include
2235 // preprocessing directive is instead replaced by an import directive.
2236 // For this implementation, the translation is permitted when we are parsing
2237 // the Global Module Fragment, and not otherwise (the cases where it would be
2238 // valid to replace an include with an import are highly constrained once in
2239 // named module purview; this choice avoids considerable complexity in
2240 // determining valid cases).
2241
2242 enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2243
2244 if (PPOpts->SingleFileParseMode)
2245 Action = IncludeLimitReached;
2246
2247 // If we've reached the max allowed include depth, it is usually due to an
2248 // include cycle. Don't enter already processed files again as it can lead to
2249 // reaching the max allowed include depth again.
2250 if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2251 alreadyIncluded(*File))
2252 Action = IncludeLimitReached;
2253
2254 // FIXME: We do not have a good way to disambiguate C++ clang modules from
2255 // C++ standard modules (other than use/non-use of Header Units).
2256 Module *SM = SuggestedModule.getModule();
2257
2258 bool MaybeTranslateInclude =
2259 Action == Enter && File && SM && !SM->isForBuilding(getLangOpts());
2260
2261 // Maybe a usable Header Unit
2262 bool UsableHeaderUnit = false;
2263 if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) {
2264 if (TrackGMFState.inGMF() || IsImportDecl)
2265 UsableHeaderUnit = true;
2266 else if (!IsImportDecl) {
2267 // This is a Header Unit that we do not include-translate
2268 SuggestedModule = ModuleMap::KnownHeader();
2269 SM = nullptr;
2270 }
2271 }
2272 // Maybe a usable clang header module.
2273 bool UsableClangHeaderModule =
2274 (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM &&
2275 !SM->isHeaderUnit();
2276
2277 // Determine whether we should try to import the module for this #include, if
2278 // there is one. Don't do so if precompiled module support is disabled or we
2279 // are processing this module textually (because we're building the module).
2280 if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2281 // If this include corresponds to a module but that module is
2282 // unavailable, diagnose the situation and bail out.
2283 // FIXME: Remove this; loadModule does the same check (but produces
2284 // slightly worse diagnostics).
2285 if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(),
2286 *SuggestedModule.getModule(),
2287 getDiagnostics())) {
2288 Diag(FilenameTok.getLocation(),
2289 diag::note_implicit_top_level_module_import_here)
2290 << SuggestedModule.getModule()->getTopLevelModuleName();
2291 return {ImportAction::None};
2292 }
2293
2294 // Compute the module access path corresponding to this module.
2295 // FIXME: Should we have a second loadModule() overload to avoid this
2296 // extra lookup step?
2297 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
2298 for (Module *Mod = SM; Mod; Mod = Mod->Parent)
2299 Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
2300 FilenameTok.getLocation()));
2301 std::reverse(Path.begin(), Path.end());
2302
2303 // Warn that we're replacing the include/import with a module import.
2304 if (!IsImportDecl)
2305 diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
2306
2307 // Load the module to import its macros. We'll make the declarations
2308 // visible when the parser gets here.
2309 // FIXME: Pass SuggestedModule in here rather than converting it to a path
2310 // and making the module loader convert it back again.
2311 ModuleLoadResult Imported = TheModuleLoader.loadModule(
2312 IncludeTok.getLocation(), Path, Module::Hidden,
2313 /*IsInclusionDirective=*/true);
2314 assert((Imported == nullptr || Imported == SuggestedModule.getModule()) &&
2315 "the imported module is different than the suggested one");
2316
2317 if (Imported) {
2318 Action = Import;
2319 } else if (Imported.isMissingExpected()) {
2320 markClangModuleAsAffecting(
2321 static_cast<Module *>(Imported)->getTopLevelModule());
2322 // We failed to find a submodule that we assumed would exist (because it
2323 // was in the directory of an umbrella header, for instance), but no
2324 // actual module containing it exists (because the umbrella header is
2325 // incomplete). Treat this as a textual inclusion.
2326 SuggestedModule = ModuleMap::KnownHeader();
2327 SM = nullptr;
2328 } else if (Imported.isConfigMismatch()) {
2329 // On a configuration mismatch, enter the header textually. We still know
2330 // that it's part of the corresponding module.
2331 } else {
2332 // We hit an error processing the import. Bail out.
2333 if (hadModuleLoaderFatalFailure()) {
2334 // With a fatal failure in the module loader, we abort parsing.
2335 Token &Result = IncludeTok;
2336 assert(CurLexer && "#include but no current lexer set!");
2337 Result.startToken();
2338 CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
2339 CurLexer->cutOffLexing();
2340 }
2341 return {ImportAction::None};
2342 }
2343 }
2344
2345 // The #included file will be considered to be a system header if either it is
2346 // in a system include directory, or if the #includer is a system include
2347 // header.
2348 SrcMgr::CharacteristicKind FileCharacter =
2349 SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
2350 if (File)
2351 FileCharacter = std::max(HeaderInfo.getFileDirFlavor(*File), FileCharacter);
2352
2353 // If this is a '#import' or an import-declaration, don't re-enter the file.
2354 //
2355 // FIXME: If we have a suggested module for a '#include', and we've already
2356 // visited this file, don't bother entering it again. We know it has no
2357 // further effect.
2358 bool EnterOnce =
2359 IsImportDecl ||
2360 IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2361
2362 bool IsFirstIncludeOfFile = false;
2363
2364 // Ask HeaderInfo if we should enter this #include file. If not, #including
2365 // this file will have no effect.
2366 if (Action == Enter && File &&
2367 !HeaderInfo.ShouldEnterIncludeFile(*this, *File, EnterOnce,
2368 getLangOpts().Modules, SM,
2369 IsFirstIncludeOfFile)) {
2370 // C++ standard modules:
2371 // If we are not in the GMF, then we textually include only
2372 // clang modules:
2373 // Even if we've already preprocessed this header once and know that we
2374 // don't need to see its contents again, we still need to import it if it's
2375 // modular because we might not have imported it from this submodule before.
2376 //
2377 // FIXME: We don't do this when compiling a PCH because the AST
2378 // serialization layer can't cope with it. This means we get local
2379 // submodule visibility semantics wrong in that case.
2380 if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2381 Action = TrackGMFState.inGMF() ? Import : Skip;
2382 else
2383 Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
2384 }
2385
2386 // Check for circular inclusion of the main file.
2387 // We can't generate a consistent preamble with regard to the conditional
2388 // stack if the main file is included again as due to the preamble bounds
2389 // some directives (e.g. #endif of a header guard) will never be seen.
2390 // Since this will lead to confusing errors, avoid the inclusion.
2391 if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2392 SourceMgr.isMainFile(File->getFileEntry())) {
2393 Diag(FilenameTok.getLocation(),
2394 diag::err_pp_including_mainfile_in_preamble);
2395 return {ImportAction::None};
2396 }
2397
2398 if (Callbacks && !IsImportDecl) {
2399 // Notify the callback object that we've seen an inclusion directive.
2400 // FIXME: Use a different callback for a pp-import?
2401 Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled,
2402 FilenameRange, File, SearchPath, RelativePath,
2403 Action == Import ? SuggestedModule.getModule()
2404 : nullptr,
2405 FileCharacter);
2406 if (Action == Skip && File)
2407 Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
2408 }
2409
2410 if (!File)
2411 return {ImportAction::None};
2412
2413 // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2414 // module corresponding to the named header.
2415 if (IsImportDecl && !SuggestedModule) {
2416 Diag(FilenameTok, diag::err_header_import_not_header_unit)
2417 << OriginalFilename << File->getName();
2418 return {ImportAction::None};
2419 }
2420
2421 // Issue a diagnostic if the name of the file on disk has a different case
2422 // than the one we're about to open.
2423 const bool CheckIncludePathPortability =
2424 !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2425
2426 if (CheckIncludePathPortability) {
2427 StringRef Name = LookupFilename;
2428 StringRef NameWithoriginalSlashes = Filename;
2429 #if defined(_WIN32)
2430 // Skip UNC prefix if present. (tryGetRealPathName() always
2431 // returns a path with the prefix skipped.)
2432 bool NameWasUNC = Name.consume_front("\\\\?\\");
2433 NameWithoriginalSlashes.consume_front("\\\\?\\");
2434 #endif
2435 StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2436 SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
2437 llvm::sys::path::end(Name));
2438 #if defined(_WIN32)
2439 // -Wnonportable-include-path is designed to diagnose includes using
2440 // case even on systems with a case-insensitive file system.
2441 // On Windows, RealPathName always starts with an upper-case drive
2442 // letter for absolute paths, but Name might start with either
2443 // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2444 // ("foo" will always have on-disk case, no matter which case was
2445 // used in the cd command). To not emit this warning solely for
2446 // the drive letter, whose case is dependent on if `cd` is used
2447 // with upper- or lower-case drive letters, always consider the
2448 // given drive letter case as correct for the purpose of this warning.
2449 SmallString<128> FixedDriveRealPath;
2450 if (llvm::sys::path::is_absolute(Name) &&
2451 llvm::sys::path::is_absolute(RealPathName) &&
2452 toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2453 isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2454 assert(Components.size() >= 3 && "should have drive, backslash, name");
2455 assert(Components[0].size() == 2 && "should start with drive");
2456 assert(Components[0][1] == ':' && "should have colon");
2457 FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2458 RealPathName = FixedDriveRealPath;
2459 }
2460 #endif
2461
2462 if (trySimplifyPath(Components, RealPathName, BackslashStyle)) {
2463 SmallString<128> Path;
2464 Path.reserve(Name.size()+2);
2465 Path.push_back(isAngled ? '<' : '"');
2466
2467 const auto IsSep = [BackslashStyle](char c) {
2468 return llvm::sys::path::is_separator(c, BackslashStyle);
2469 };
2470
2471 for (auto Component : Components) {
2472 // On POSIX, Components will contain a single '/' as first element
2473 // exactly if Name is an absolute path.
2474 // On Windows, it will contain "C:" followed by '\' for absolute paths.
2475 // The drive letter is optional for absolute paths on Windows, but
2476 // clang currently cannot process absolute paths in #include lines that
2477 // don't have a drive.
2478 // If the first entry in Components is a directory separator,
2479 // then the code at the bottom of this loop that keeps the original
2480 // directory separator style copies it. If the second entry is
2481 // a directory separator (the C:\ case), then that separator already
2482 // got copied when the C: was processed and we want to skip that entry.
2483 if (!(Component.size() == 1 && IsSep(Component[0])))
2484 Path.append(Component);
2485 else if (Path.size() != 1)
2486 continue;
2487
2488 // Append the separator(s) the user used, or the close quote
2489 if (Path.size() > NameWithoriginalSlashes.size()) {
2490 Path.push_back(isAngled ? '>' : '"');
2491 continue;
2492 }
2493 assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2494 do
2495 Path.push_back(NameWithoriginalSlashes[Path.size()-1]);
2496 while (Path.size() <= NameWithoriginalSlashes.size() &&
2497 IsSep(NameWithoriginalSlashes[Path.size()-1]));
2498 }
2499
2500 #if defined(_WIN32)
2501 // Restore UNC prefix if it was there.
2502 if (NameWasUNC)
2503 Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2504 #endif
2505
2506 // For user files and known standard headers, issue a diagnostic.
2507 // For other system headers, don't. They can be controlled separately.
2508 auto DiagId =
2509 (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
2510 ? diag::pp_nonportable_path
2511 : diag::pp_nonportable_system_path;
2512 Diag(FilenameTok, DiagId) << Path <<
2513 FixItHint::CreateReplacement(FilenameRange, Path);
2514 }
2515 }
2516
2517 switch (Action) {
2518 case Skip:
2519 // If we don't need to enter the file, stop now.
2520 if (SM)
2521 return {ImportAction::SkippedModuleImport, SM};
2522 return {ImportAction::None};
2523
2524 case IncludeLimitReached:
2525 // If we reached our include limit and don't want to enter any more files,
2526 // don't go any further.
2527 return {ImportAction::None};
2528
2529 case Import: {
2530 // If this is a module import, make it visible if needed.
2531 assert(SM && "no module to import");
2532
2533 makeModuleVisible(SM, EndLoc);
2534
2535 if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2536 tok::pp___include_macros)
2537 return {ImportAction::None};
2538
2539 return {ImportAction::ModuleImport, SM};
2540 }
2541
2542 case Enter:
2543 break;
2544 }
2545
2546 // Check that we don't have infinite #include recursion.
2547 if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2548 Diag(FilenameTok, diag::err_pp_include_too_deep);
2549 HasReachedMaxIncludeDepth = true;
2550 return {ImportAction::None};
2551 }
2552
2553 if (isAngled && isInNamedModule())
2554 Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview)
2555 << getNamedModuleName();
2556
2557 // Look up the file, create a File ID for it.
2558 SourceLocation IncludePos = FilenameTok.getLocation();
2559 // If the filename string was the result of macro expansions, set the include
2560 // position on the file where it will be included and after the expansions.
2561 if (IncludePos.isMacroID())
2562 IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
2563 FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);
2564 if (!FID.isValid()) {
2565 TheModuleLoader.HadFatalFailure = true;
2566 return ImportAction::Failure;
2567 }
2568
2569 // If all is good, enter the new file!
2570 if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(),
2571 IsFirstIncludeOfFile))
2572 return {ImportAction::None};
2573
2574 // Determine if we're switching to building a new submodule, and which one.
2575 // This does not apply for C++20 modules header units.
2576 if (SM && !SM->isHeaderUnit()) {
2577 if (SM->getTopLevelModule()->ShadowingModule) {
2578 // We are building a submodule that belongs to a shadowed module. This
2579 // means we find header files in the shadowed module.
2580 Diag(SM->DefinitionLoc, diag::err_module_build_shadowed_submodule)
2581 << SM->getFullModuleName();
2582 Diag(SM->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2583 diag::note_previous_definition);
2584 return {ImportAction::None};
2585 }
2586 // When building a pch, -fmodule-name tells the compiler to textually
2587 // include headers in the specified module. We are not building the
2588 // specified module.
2589 //
2590 // FIXME: This is the wrong way to handle this. We should produce a PCH
2591 // that behaves the same as the header would behave in a compilation using
2592 // that PCH, which means we should enter the submodule. We need to teach
2593 // the AST serialization layer to deal with the resulting AST.
2594 if (getLangOpts().CompilingPCH && SM->isForBuilding(getLangOpts()))
2595 return {ImportAction::None};
2596
2597 assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2598 CurLexerSubmodule = SM;
2599
2600 // Let the macro handling code know that any future macros are within
2601 // the new submodule.
2602 EnterSubmodule(SM, EndLoc, /*ForPragma*/ false);
2603
2604 // Let the parser know that any future declarations are within the new
2605 // submodule.
2606 // FIXME: There's no point doing this if we're handling a #__include_macros
2607 // directive.
2608 return {ImportAction::ModuleBegin, SM};
2609 }
2610
2611 assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2612 return {ImportAction::None};
2613 }
2614
2615 /// HandleIncludeNextDirective - Implements \#include_next.
2616 ///
HandleIncludeNextDirective(SourceLocation HashLoc,Token & IncludeNextTok)2617 void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2618 Token &IncludeNextTok) {
2619 Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2620
2621 ConstSearchDirIterator Lookup = nullptr;
2622 const FileEntry *LookupFromFile;
2623 std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2624
2625 return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,
2626 LookupFromFile);
2627 }
2628
2629 /// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
HandleMicrosoftImportDirective(Token & Tok)2630 void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2631 // The Microsoft #import directive takes a type library and generates header
2632 // files from it, and includes those. This is beyond the scope of what clang
2633 // does, so we ignore it and error out. However, #import can optionally have
2634 // trailing attributes that span multiple lines. We're going to eat those
2635 // so we can continue processing from there.
2636 Diag(Tok, diag::err_pp_import_directive_ms );
2637
2638 // Read tokens until we get to the end of the directive. Note that the
2639 // directive can be split over multiple lines using the backslash character.
2640 DiscardUntilEndOfDirective();
2641 }
2642
2643 /// HandleImportDirective - Implements \#import.
2644 ///
HandleImportDirective(SourceLocation HashLoc,Token & ImportTok)2645 void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2646 Token &ImportTok) {
2647 if (!LangOpts.ObjC) { // #import is standard for ObjC.
2648 if (LangOpts.MSVCCompat)
2649 return HandleMicrosoftImportDirective(ImportTok);
2650 Diag(ImportTok, diag::ext_pp_import_directive);
2651 }
2652 return HandleIncludeDirective(HashLoc, ImportTok);
2653 }
2654
2655 /// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2656 /// pseudo directive in the predefines buffer. This handles it by sucking all
2657 /// tokens through the preprocessor and discarding them (only keeping the side
2658 /// effects on the preprocessor).
HandleIncludeMacrosDirective(SourceLocation HashLoc,Token & IncludeMacrosTok)2659 void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2660 Token &IncludeMacrosTok) {
2661 // This directive should only occur in the predefines buffer. If not, emit an
2662 // error and reject it.
2663 SourceLocation Loc = IncludeMacrosTok.getLocation();
2664 if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2665 Diag(IncludeMacrosTok.getLocation(),
2666 diag::pp_include_macros_out_of_predefines);
2667 DiscardUntilEndOfDirective();
2668 return;
2669 }
2670
2671 // Treat this as a normal #include for checking purposes. If this is
2672 // successful, it will push a new lexer onto the include stack.
2673 HandleIncludeDirective(HashLoc, IncludeMacrosTok);
2674
2675 Token TmpTok;
2676 do {
2677 Lex(TmpTok);
2678 assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2679 } while (TmpTok.isNot(tok::hashhash));
2680 }
2681
2682 //===----------------------------------------------------------------------===//
2683 // Preprocessor Macro Directive Handling.
2684 //===----------------------------------------------------------------------===//
2685
2686 /// ReadMacroParameterList - The ( starting a parameter list of a macro
2687 /// definition has just been read. Lex the rest of the parameters and the
2688 /// closing ), updating MI with what we learn. Return true if an error occurs
2689 /// parsing the param list.
ReadMacroParameterList(MacroInfo * MI,Token & Tok)2690 bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2691 SmallVector<IdentifierInfo*, 32> Parameters;
2692
2693 while (true) {
2694 LexUnexpandedNonComment(Tok);
2695 switch (Tok.getKind()) {
2696 case tok::r_paren:
2697 // Found the end of the parameter list.
2698 if (Parameters.empty()) // #define FOO()
2699 return false;
2700 // Otherwise we have #define FOO(A,)
2701 Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2702 return true;
2703 case tok::ellipsis: // #define X(... -> C99 varargs
2704 if (!LangOpts.C99)
2705 Diag(Tok, LangOpts.CPlusPlus11 ?
2706 diag::warn_cxx98_compat_variadic_macro :
2707 diag::ext_variadic_macro);
2708
2709 // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2710 if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2711 Diag(Tok, diag::ext_pp_opencl_variadic_macros);
2712 }
2713
2714 // Lex the token after the identifier.
2715 LexUnexpandedNonComment(Tok);
2716 if (Tok.isNot(tok::r_paren)) {
2717 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2718 return true;
2719 }
2720 // Add the __VA_ARGS__ identifier as a parameter.
2721 Parameters.push_back(Ident__VA_ARGS__);
2722 MI->setIsC99Varargs();
2723 MI->setParameterList(Parameters, BP);
2724 return false;
2725 case tok::eod: // #define X(
2726 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2727 return true;
2728 default:
2729 // Handle keywords and identifiers here to accept things like
2730 // #define Foo(for) for.
2731 IdentifierInfo *II = Tok.getIdentifierInfo();
2732 if (!II) {
2733 // #define X(1
2734 Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2735 return true;
2736 }
2737
2738 // If this is already used as a parameter, it is used multiple times (e.g.
2739 // #define X(A,A.
2740 if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6
2741 Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
2742 return true;
2743 }
2744
2745 // Add the parameter to the macro info.
2746 Parameters.push_back(II);
2747
2748 // Lex the token after the identifier.
2749 LexUnexpandedNonComment(Tok);
2750
2751 switch (Tok.getKind()) {
2752 default: // #define X(A B
2753 Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2754 return true;
2755 case tok::r_paren: // #define X(A)
2756 MI->setParameterList(Parameters, BP);
2757 return false;
2758 case tok::comma: // #define X(A,
2759 break;
2760 case tok::ellipsis: // #define X(A... -> GCC extension
2761 // Diagnose extension.
2762 Diag(Tok, diag::ext_named_variadic_macro);
2763
2764 // Lex the token after the identifier.
2765 LexUnexpandedNonComment(Tok);
2766 if (Tok.isNot(tok::r_paren)) {
2767 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2768 return true;
2769 }
2770
2771 MI->setIsGNUVarargs();
2772 MI->setParameterList(Parameters, BP);
2773 return false;
2774 }
2775 }
2776 }
2777 }
2778
isConfigurationPattern(Token & MacroName,MacroInfo * MI,const LangOptions & LOptions)2779 static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2780 const LangOptions &LOptions) {
2781 if (MI->getNumTokens() == 1) {
2782 const Token &Value = MI->getReplacementToken(0);
2783
2784 // Macro that is identity, like '#define inline inline' is a valid pattern.
2785 if (MacroName.getKind() == Value.getKind())
2786 return true;
2787
2788 // Macro that maps a keyword to the same keyword decorated with leading/
2789 // trailing underscores is a valid pattern:
2790 // #define inline __inline
2791 // #define inline __inline__
2792 // #define inline _inline (in MS compatibility mode)
2793 StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2794 if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2795 if (!II->isKeyword(LOptions))
2796 return false;
2797 StringRef ValueText = II->getName();
2798 StringRef TrimmedValue = ValueText;
2799 if (!ValueText.starts_with("__")) {
2800 if (ValueText.starts_with("_"))
2801 TrimmedValue = TrimmedValue.drop_front(1);
2802 else
2803 return false;
2804 } else {
2805 TrimmedValue = TrimmedValue.drop_front(2);
2806 if (TrimmedValue.ends_with("__"))
2807 TrimmedValue = TrimmedValue.drop_back(2);
2808 }
2809 return TrimmedValue.equals(MacroText);
2810 } else {
2811 return false;
2812 }
2813 }
2814
2815 // #define inline
2816 return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
2817 tok::kw_const) &&
2818 MI->getNumTokens() == 0;
2819 }
2820
2821 // ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2822 // entire line) of the macro's tokens and adds them to MacroInfo, and while
2823 // doing so performs certain validity checks including (but not limited to):
2824 // - # (stringization) is followed by a macro parameter
2825 //
2826 // Returns a nullptr if an invalid sequence of tokens is encountered or returns
2827 // a pointer to a MacroInfo object.
2828
ReadOptionalMacroParameterListAndBody(const Token & MacroNameTok,const bool ImmediatelyAfterHeaderGuard)2829 MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2830 const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2831
2832 Token LastTok = MacroNameTok;
2833 // Create the new macro.
2834 MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
2835
2836 Token Tok;
2837 LexUnexpandedToken(Tok);
2838
2839 // Ensure we consume the rest of the macro body if errors occur.
2840 auto _ = llvm::make_scope_exit([&]() {
2841 // The flag indicates if we are still waiting for 'eod'.
2842 if (CurLexer->ParsingPreprocessorDirective)
2843 DiscardUntilEndOfDirective();
2844 });
2845
2846 // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2847 // within their appropriate context.
2848 VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2849
2850 // If this is a function-like macro definition, parse the argument list,
2851 // marking each of the identifiers as being used as macro arguments. Also,
2852 // check other constraints on the first token of the macro body.
2853 if (Tok.is(tok::eod)) {
2854 if (ImmediatelyAfterHeaderGuard) {
2855 // Save this macro information since it may part of a header guard.
2856 CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),
2857 MacroNameTok.getLocation());
2858 }
2859 // If there is no body to this macro, we have no special handling here.
2860 } else if (Tok.hasLeadingSpace()) {
2861 // This is a normal token with leading space. Clear the leading space
2862 // marker on the first token to get proper expansion.
2863 Tok.clearFlag(Token::LeadingSpace);
2864 } else if (Tok.is(tok::l_paren)) {
2865 // This is a function-like macro definition. Read the argument list.
2866 MI->setIsFunctionLike();
2867 if (ReadMacroParameterList(MI, LastTok))
2868 return nullptr;
2869
2870 // If this is a definition of an ISO C/C++ variadic function-like macro (not
2871 // using the GNU named varargs extension) inform our variadic scope guard
2872 // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2873 // allowed only within the definition of a variadic macro.
2874
2875 if (MI->isC99Varargs()) {
2876 VariadicMacroScopeGuard.enterScope();
2877 }
2878
2879 // Read the first token after the arg list for down below.
2880 LexUnexpandedToken(Tok);
2881 } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2882 // C99 requires whitespace between the macro definition and the body. Emit
2883 // a diagnostic for something like "#define X+".
2884 Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2885 } else {
2886 // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2887 // first character of a replacement list is not a character required by
2888 // subclause 5.2.1, then there shall be white-space separation between the
2889 // identifier and the replacement list.". 5.2.1 lists this set:
2890 // "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
2891 // is irrelevant here.
2892 bool isInvalid = false;
2893 if (Tok.is(tok::at)) // @ is not in the list above.
2894 isInvalid = true;
2895 else if (Tok.is(tok::unknown)) {
2896 // If we have an unknown token, it is something strange like "`". Since
2897 // all of valid characters would have lexed into a single character
2898 // token of some sort, we know this is not a valid case.
2899 isInvalid = true;
2900 }
2901 if (isInvalid)
2902 Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
2903 else
2904 Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
2905 }
2906
2907 if (!Tok.is(tok::eod))
2908 LastTok = Tok;
2909
2910 SmallVector<Token, 16> Tokens;
2911
2912 // Read the rest of the macro body.
2913 if (MI->isObjectLike()) {
2914 // Object-like macros are very simple, just read their body.
2915 while (Tok.isNot(tok::eod)) {
2916 LastTok = Tok;
2917 Tokens.push_back(Tok);
2918 // Get the next token of the macro.
2919 LexUnexpandedToken(Tok);
2920 }
2921 } else {
2922 // Otherwise, read the body of a function-like macro. While we are at it,
2923 // check C99 6.10.3.2p1: ensure that # operators are followed by macro
2924 // parameters in function-like macro expansions.
2925
2926 VAOptDefinitionContext VAOCtx(*this);
2927
2928 while (Tok.isNot(tok::eod)) {
2929 LastTok = Tok;
2930
2931 if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
2932 Tokens.push_back(Tok);
2933
2934 if (VAOCtx.isVAOptToken(Tok)) {
2935 // If we're already within a VAOPT, emit an error.
2936 if (VAOCtx.isInVAOpt()) {
2937 Diag(Tok, diag::err_pp_vaopt_nested_use);
2938 return nullptr;
2939 }
2940 // Ensure VAOPT is followed by a '(' .
2941 LexUnexpandedToken(Tok);
2942 if (Tok.isNot(tok::l_paren)) {
2943 Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
2944 return nullptr;
2945 }
2946 Tokens.push_back(Tok);
2947 VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
2948 LexUnexpandedToken(Tok);
2949 if (Tok.is(tok::hashhash)) {
2950 Diag(Tok, diag::err_vaopt_paste_at_start);
2951 return nullptr;
2952 }
2953 continue;
2954 } else if (VAOCtx.isInVAOpt()) {
2955 if (Tok.is(tok::r_paren)) {
2956 if (VAOCtx.sawClosingParen()) {
2957 assert(Tokens.size() >= 3 &&
2958 "Must have seen at least __VA_OPT__( "
2959 "and a subsequent tok::r_paren");
2960 if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {
2961 Diag(Tok, diag::err_vaopt_paste_at_end);
2962 return nullptr;
2963 }
2964 }
2965 } else if (Tok.is(tok::l_paren)) {
2966 VAOCtx.sawOpeningParen(Tok.getLocation());
2967 }
2968 }
2969 // Get the next token of the macro.
2970 LexUnexpandedToken(Tok);
2971 continue;
2972 }
2973
2974 // If we're in -traditional mode, then we should ignore stringification
2975 // and token pasting. Mark the tokens as unknown so as not to confuse
2976 // things.
2977 if (getLangOpts().TraditionalCPP) {
2978 Tok.setKind(tok::unknown);
2979 Tokens.push_back(Tok);
2980
2981 // Get the next token of the macro.
2982 LexUnexpandedToken(Tok);
2983 continue;
2984 }
2985
2986 if (Tok.is(tok::hashhash)) {
2987 // If we see token pasting, check if it looks like the gcc comma
2988 // pasting extension. We'll use this information to suppress
2989 // diagnostics later on.
2990
2991 // Get the next token of the macro.
2992 LexUnexpandedToken(Tok);
2993
2994 if (Tok.is(tok::eod)) {
2995 Tokens.push_back(LastTok);
2996 break;
2997 }
2998
2999 if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3000 Tokens[Tokens.size() - 1].is(tok::comma))
3001 MI->setHasCommaPasting();
3002
3003 // Things look ok, add the '##' token to the macro.
3004 Tokens.push_back(LastTok);
3005 continue;
3006 }
3007
3008 // Our Token is a stringization operator.
3009 // Get the next token of the macro.
3010 LexUnexpandedToken(Tok);
3011
3012 // Check for a valid macro arg identifier or __VA_OPT__.
3013 if (!VAOCtx.isVAOptToken(Tok) &&
3014 (Tok.getIdentifierInfo() == nullptr ||
3015 MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {
3016
3017 // If this is assembler-with-cpp mode, we accept random gibberish after
3018 // the '#' because '#' is often a comment character. However, change
3019 // the kind of the token to tok::unknown so that the preprocessor isn't
3020 // confused.
3021 if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
3022 LastTok.setKind(tok::unknown);
3023 Tokens.push_back(LastTok);
3024 continue;
3025 } else {
3026 Diag(Tok, diag::err_pp_stringize_not_parameter)
3027 << LastTok.is(tok::hashat);
3028 return nullptr;
3029 }
3030 }
3031
3032 // Things look ok, add the '#' and param name tokens to the macro.
3033 Tokens.push_back(LastTok);
3034
3035 // If the token following '#' is VAOPT, let the next iteration handle it
3036 // and check it for correctness, otherwise add the token and prime the
3037 // loop with the next one.
3038 if (!VAOCtx.isVAOptToken(Tok)) {
3039 Tokens.push_back(Tok);
3040 LastTok = Tok;
3041
3042 // Get the next token of the macro.
3043 LexUnexpandedToken(Tok);
3044 }
3045 }
3046 if (VAOCtx.isInVAOpt()) {
3047 assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3048 Diag(Tok, diag::err_pp_expected_after)
3049 << LastTok.getKind() << tok::r_paren;
3050 Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
3051 return nullptr;
3052 }
3053 }
3054 MI->setDefinitionEndLoc(LastTok.getLocation());
3055
3056 MI->setTokens(Tokens, BP);
3057 return MI;
3058 }
3059
isObjCProtectedMacro(const IdentifierInfo * II)3060 static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3061 return II->isStr("__strong") || II->isStr("__weak") ||
3062 II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing");
3063 }
3064
3065 /// HandleDefineDirective - Implements \#define. This consumes the entire macro
3066 /// line then lets the caller lex the next real token.
HandleDefineDirective(Token & DefineTok,const bool ImmediatelyAfterHeaderGuard)3067 void Preprocessor::HandleDefineDirective(
3068 Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3069 ++NumDefined;
3070
3071 Token MacroNameTok;
3072 bool MacroShadowsKeyword;
3073 ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
3074
3075 // Error reading macro name? If so, diagnostic already issued.
3076 if (MacroNameTok.is(tok::eod))
3077 return;
3078
3079 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3080 // Issue a final pragma warning if we're defining a macro that was has been
3081 // undefined and is being redefined.
3082 if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3083 emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3084
3085 // If we are supposed to keep comments in #defines, reenable comment saving
3086 // mode.
3087 if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3088
3089 MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3090 MacroNameTok, ImmediatelyAfterHeaderGuard);
3091
3092 if (!MI) return;
3093
3094 if (MacroShadowsKeyword &&
3095 !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
3096 Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
3097 }
3098 // Check that there is no paste (##) operator at the beginning or end of the
3099 // replacement list.
3100 unsigned NumTokens = MI->getNumTokens();
3101 if (NumTokens != 0) {
3102 if (MI->getReplacementToken(0).is(tok::hashhash)) {
3103 Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
3104 return;
3105 }
3106 if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
3107 Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
3108 return;
3109 }
3110 }
3111
3112 // When skipping just warn about macros that do not match.
3113 if (SkippingUntilPCHThroughHeader) {
3114 const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());
3115 if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
3116 /*Syntactic=*/LangOpts.MicrosoftExt))
3117 Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
3118 << MacroNameTok.getIdentifierInfo();
3119 // Issue the diagnostic but allow the change if msvc extensions are enabled
3120 if (!LangOpts.MicrosoftExt)
3121 return;
3122 }
3123
3124 // Finally, if this identifier already had a macro defined for it, verify that
3125 // the macro bodies are identical, and issue diagnostics if they are not.
3126 if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
3127 // Final macros are hard-mode: they always warn. Even if the bodies are
3128 // identical. Even if they are in system headers. Even if they are things we
3129 // would silently allow in the past.
3130 if (MacroNameTok.getIdentifierInfo()->isFinal())
3131 emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3132
3133 // In Objective-C, ignore attempts to directly redefine the builtin
3134 // definitions of the ownership qualifiers. It's still possible to
3135 // #undef them.
3136 if (getLangOpts().ObjC &&
3137 SourceMgr.getFileID(OtherMI->getDefinitionLoc()) ==
3138 getPredefinesFileID() &&
3139 isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
3140 // Warn if it changes the tokens.
3141 if ((!getDiagnostics().getSuppressSystemWarnings() ||
3142 !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
3143 !MI->isIdenticalTo(*OtherMI, *this,
3144 /*Syntactic=*/LangOpts.MicrosoftExt)) {
3145 Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
3146 }
3147 assert(!OtherMI->isWarnIfUnused());
3148 return;
3149 }
3150
3151 // It is very common for system headers to have tons of macro redefinitions
3152 // and for warnings to be disabled in system headers. If this is the case,
3153 // then don't bother calling MacroInfo::isIdenticalTo.
3154 if (!getDiagnostics().getSuppressSystemWarnings() ||
3155 !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
3156
3157 if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3158 Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
3159
3160 // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3161 // C++ [cpp.predefined]p4, but allow it as an extension.
3162 if (isLanguageDefinedBuiltin(SourceMgr, OtherMI, II->getName()))
3163 Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
3164 // Macros must be identical. This means all tokens and whitespace
3165 // separation must be the same. C99 6.10.3p2.
3166 else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3167 !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
3168 Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
3169 << MacroNameTok.getIdentifierInfo();
3170 Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
3171 }
3172 }
3173 if (OtherMI->isWarnIfUnused())
3174 WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
3175 }
3176
3177 DefMacroDirective *MD =
3178 appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
3179
3180 assert(!MI->isUsed());
3181 // If we need warning for not using the macro, add its location in the
3182 // warn-because-unused-macro set. If it gets used it will be removed from set.
3183 if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
3184 !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
3185 !MacroExpansionInDirectivesOverride &&
3186 getSourceManager().getFileID(MI->getDefinitionLoc()) !=
3187 getPredefinesFileID()) {
3188 MI->setIsWarnIfUnused(true);
3189 WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
3190 }
3191
3192 // If the callbacks want to know, tell them about the macro definition.
3193 if (Callbacks)
3194 Callbacks->MacroDefined(MacroNameTok, MD);
3195
3196 // If we're in MS compatibility mode and the macro being defined is the
3197 // assert macro, implicitly add a macro definition for static_assert to work
3198 // around their broken assert.h header file in C. Only do so if there isn't
3199 // already a static_assert macro defined.
3200 if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat &&
3201 MacroNameTok.getIdentifierInfo()->isStr("assert") &&
3202 !isMacroDefined("static_assert")) {
3203 MacroInfo *MI = AllocateMacroInfo(SourceLocation());
3204
3205 Token Tok;
3206 Tok.startToken();
3207 Tok.setKind(tok::kw__Static_assert);
3208 Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert"));
3209 MI->setTokens({Tok}, BP);
3210 (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI);
3211 }
3212 }
3213
3214 /// HandleUndefDirective - Implements \#undef.
3215 ///
HandleUndefDirective()3216 void Preprocessor::HandleUndefDirective() {
3217 ++NumUndefined;
3218
3219 Token MacroNameTok;
3220 ReadMacroName(MacroNameTok, MU_Undef);
3221
3222 // Error reading macro name? If so, diagnostic already issued.
3223 if (MacroNameTok.is(tok::eod))
3224 return;
3225
3226 // Check to see if this is the last token on the #undef line.
3227 CheckEndOfDirective("undef");
3228
3229 // Okay, we have a valid identifier to undef.
3230 auto *II = MacroNameTok.getIdentifierInfo();
3231 auto MD = getMacroDefinition(II);
3232 UndefMacroDirective *Undef = nullptr;
3233
3234 if (II->isFinal())
3235 emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);
3236
3237 // If the macro is not defined, this is a noop undef.
3238 if (const MacroInfo *MI = MD.getMacroInfo()) {
3239 if (!MI->isUsed() && MI->isWarnIfUnused())
3240 Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
3241
3242 // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3243 // C++ [cpp.predefined]p4, but allow it as an extension.
3244 if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName()))
3245 Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
3246
3247 if (MI->isWarnIfUnused())
3248 WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
3249
3250 Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());
3251 }
3252
3253 // If the callbacks want to know, tell them about the macro #undef.
3254 // Note: no matter if the macro was defined or not.
3255 if (Callbacks)
3256 Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3257
3258 if (Undef)
3259 appendMacroDirective(II, Undef);
3260 }
3261
3262 //===----------------------------------------------------------------------===//
3263 // Preprocessor Conditional Directive Handling.
3264 //===----------------------------------------------------------------------===//
3265
3266 /// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive. isIfndef
3267 /// is true when this is a \#ifndef directive. ReadAnyTokensBeforeDirective is
3268 /// true if any tokens have been returned or pp-directives activated before this
3269 /// \#ifndef has been lexed.
3270 ///
HandleIfdefDirective(Token & Result,const Token & HashToken,bool isIfndef,bool ReadAnyTokensBeforeDirective)3271 void Preprocessor::HandleIfdefDirective(Token &Result,
3272 const Token &HashToken,
3273 bool isIfndef,
3274 bool ReadAnyTokensBeforeDirective) {
3275 ++NumIf;
3276 Token DirectiveTok = Result;
3277
3278 Token MacroNameTok;
3279 ReadMacroName(MacroNameTok);
3280
3281 // Error reading macro name? If so, diagnostic already issued.
3282 if (MacroNameTok.is(tok::eod)) {
3283 // Skip code until we get to #endif. This helps with recovery by not
3284 // emitting an error when the #endif is reached.
3285 SkipExcludedConditionalBlock(HashToken.getLocation(),
3286 DirectiveTok.getLocation(),
3287 /*Foundnonskip*/ false, /*FoundElse*/ false);
3288 return;
3289 }
3290
3291 emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true);
3292
3293 // Check to see if this is the last token on the #if[n]def line.
3294 CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
3295
3296 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3297 auto MD = getMacroDefinition(MII);
3298 MacroInfo *MI = MD.getMacroInfo();
3299
3300 if (CurPPLexer->getConditionalStackDepth() == 0) {
3301 // If the start of a top-level #ifdef and if the macro is not defined,
3302 // inform MIOpt that this might be the start of a proper include guard.
3303 // Otherwise it is some other form of unknown conditional which we can't
3304 // handle.
3305 if (!ReadAnyTokensBeforeDirective && !MI) {
3306 assert(isIfndef && "#ifdef shouldn't reach here");
3307 CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());
3308 } else
3309 CurPPLexer->MIOpt.EnterTopLevelConditional();
3310 }
3311
3312 // If there is a macro, process it.
3313 if (MI) // Mark it used.
3314 markMacroAsUsed(MI);
3315
3316 if (Callbacks) {
3317 if (isIfndef)
3318 Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
3319 else
3320 Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
3321 }
3322
3323 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3324 getSourceManager().isInMainFile(DirectiveTok.getLocation());
3325
3326 // Should we include the stuff contained by this directive?
3327 if (PPOpts->SingleFileParseMode && !MI) {
3328 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3329 // the directive blocks.
3330 CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3331 /*wasskip*/false, /*foundnonskip*/false,
3332 /*foundelse*/false);
3333 } else if (!MI == isIfndef || RetainExcludedCB) {
3334 // Yes, remember that we are inside a conditional, then lex the next token.
3335 CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3336 /*wasskip*/false, /*foundnonskip*/true,
3337 /*foundelse*/false);
3338 } else {
3339 // No, skip the contents of this block.
3340 SkipExcludedConditionalBlock(HashToken.getLocation(),
3341 DirectiveTok.getLocation(),
3342 /*Foundnonskip*/ false,
3343 /*FoundElse*/ false);
3344 }
3345 }
3346
3347 /// HandleIfDirective - Implements the \#if directive.
3348 ///
HandleIfDirective(Token & IfToken,const Token & HashToken,bool ReadAnyTokensBeforeDirective)3349 void Preprocessor::HandleIfDirective(Token &IfToken,
3350 const Token &HashToken,
3351 bool ReadAnyTokensBeforeDirective) {
3352 ++NumIf;
3353
3354 // Parse and evaluate the conditional expression.
3355 IdentifierInfo *IfNDefMacro = nullptr;
3356 const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3357 const bool ConditionalTrue = DER.Conditional;
3358 // Lexer might become invalid if we hit code completion point while evaluating
3359 // expression.
3360 if (!CurPPLexer)
3361 return;
3362
3363 // If this condition is equivalent to #ifndef X, and if this is the first
3364 // directive seen, handle it for the multiple-include optimization.
3365 if (CurPPLexer->getConditionalStackDepth() == 0) {
3366 if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3367 // FIXME: Pass in the location of the macro name, not the 'if' token.
3368 CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());
3369 else
3370 CurPPLexer->MIOpt.EnterTopLevelConditional();
3371 }
3372
3373 if (Callbacks)
3374 Callbacks->If(
3375 IfToken.getLocation(), DER.ExprRange,
3376 (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3377
3378 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3379 getSourceManager().isInMainFile(IfToken.getLocation());
3380
3381 // Should we include the stuff contained by this directive?
3382 if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
3383 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3384 // the directive blocks.
3385 CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3386 /*foundnonskip*/false, /*foundelse*/false);
3387 } else if (ConditionalTrue || RetainExcludedCB) {
3388 // Yes, remember that we are inside a conditional, then lex the next token.
3389 CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3390 /*foundnonskip*/true, /*foundelse*/false);
3391 } else {
3392 // No, skip the contents of this block.
3393 SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
3394 /*Foundnonskip*/ false,
3395 /*FoundElse*/ false);
3396 }
3397 }
3398
3399 /// HandleEndifDirective - Implements the \#endif directive.
3400 ///
HandleEndifDirective(Token & EndifToken)3401 void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3402 ++NumEndif;
3403
3404 // Check that this is the whole directive.
3405 CheckEndOfDirective("endif");
3406
3407 PPConditionalInfo CondInfo;
3408 if (CurPPLexer->popConditionalLevel(CondInfo)) {
3409 // No conditionals on the stack: this is an #endif without an #if.
3410 Diag(EndifToken, diag::err_pp_endif_without_if);
3411 return;
3412 }
3413
3414 // If this the end of a top-level #endif, inform MIOpt.
3415 if (CurPPLexer->getConditionalStackDepth() == 0)
3416 CurPPLexer->MIOpt.ExitTopLevelConditional();
3417
3418 assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3419 "This code should only be reachable in the non-skipping case!");
3420
3421 if (Callbacks)
3422 Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);
3423 }
3424
3425 /// HandleElseDirective - Implements the \#else directive.
3426 ///
HandleElseDirective(Token & Result,const Token & HashToken)3427 void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3428 ++NumElse;
3429
3430 // #else directive in a non-skipping conditional... start skipping.
3431 CheckEndOfDirective("else");
3432
3433 PPConditionalInfo CI;
3434 if (CurPPLexer->popConditionalLevel(CI)) {
3435 Diag(Result, diag::pp_err_else_without_if);
3436 return;
3437 }
3438
3439 // If this is a top-level #else, inform the MIOpt.
3440 if (CurPPLexer->getConditionalStackDepth() == 0)
3441 CurPPLexer->MIOpt.EnterTopLevelConditional();
3442
3443 // If this is a #else with a #else before it, report the error.
3444 if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
3445
3446 if (Callbacks)
3447 Callbacks->Else(Result.getLocation(), CI.IfLoc);
3448
3449 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3450 getSourceManager().isInMainFile(Result.getLocation());
3451
3452 if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3453 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3454 // the directive blocks.
3455 CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
3456 /*foundnonskip*/false, /*foundelse*/true);
3457 return;
3458 }
3459
3460 // Finally, skip the rest of the contents of this block.
3461 SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
3462 /*Foundnonskip*/ true,
3463 /*FoundElse*/ true, Result.getLocation());
3464 }
3465
3466 /// Implements the \#elif, \#elifdef, and \#elifndef directives.
HandleElifFamilyDirective(Token & ElifToken,const Token & HashToken,tok::PPKeywordKind Kind)3467 void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3468 const Token &HashToken,
3469 tok::PPKeywordKind Kind) {
3470 PPElifDiag DirKind = Kind == tok::pp_elif ? PED_Elif
3471 : Kind == tok::pp_elifdef ? PED_Elifdef
3472 : PED_Elifndef;
3473 ++NumElse;
3474
3475 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3476 switch (DirKind) {
3477 case PED_Elifdef:
3478 case PED_Elifndef:
3479 unsigned DiagID;
3480 if (LangOpts.CPlusPlus)
3481 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3482 : diag::ext_cxx23_pp_directive;
3483 else
3484 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3485 : diag::ext_c23_pp_directive;
3486 Diag(ElifToken, DiagID) << DirKind;
3487 break;
3488 default:
3489 break;
3490 }
3491
3492 // #elif directive in a non-skipping conditional... start skipping.
3493 // We don't care what the condition is, because we will always skip it (since
3494 // the block immediately before it was included).
3495 SourceRange ConditionRange = DiscardUntilEndOfDirective();
3496
3497 PPConditionalInfo CI;
3498 if (CurPPLexer->popConditionalLevel(CI)) {
3499 Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind;
3500 return;
3501 }
3502
3503 // If this is a top-level #elif, inform the MIOpt.
3504 if (CurPPLexer->getConditionalStackDepth() == 0)
3505 CurPPLexer->MIOpt.EnterTopLevelConditional();
3506
3507 // If this is a #elif with a #else before it, report the error.
3508 if (CI.FoundElse)
3509 Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind;
3510
3511 if (Callbacks) {
3512 switch (Kind) {
3513 case tok::pp_elif:
3514 Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
3515 PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
3516 break;
3517 case tok::pp_elifdef:
3518 Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3519 break;
3520 case tok::pp_elifndef:
3521 Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3522 break;
3523 default:
3524 assert(false && "unexpected directive kind");
3525 break;
3526 }
3527 }
3528
3529 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3530 getSourceManager().isInMainFile(ElifToken.getLocation());
3531
3532 if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3533 // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3534 // the directive blocks.
3535 CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
3536 /*foundnonskip*/false, /*foundelse*/false);
3537 return;
3538 }
3539
3540 // Finally, skip the rest of the contents of this block.
3541 SkipExcludedConditionalBlock(
3542 HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
3543 /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
3544 }
3545