1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API
11 /// information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/AST/ASTConcept.h"
16 #include "clang/AST/ASTConsumer.h"
17 #include "clang/AST/ASTContext.h"
18 #include "clang/AST/DeclObjC.h"
19 #include "clang/Basic/DiagnosticFrontend.h"
20 #include "clang/Basic/FileEntry.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Basic/TargetInfo.h"
24 #include "clang/ExtractAPI/API.h"
25 #include "clang/ExtractAPI/APIIgnoresList.h"
26 #include "clang/ExtractAPI/ExtractAPIVisitor.h"
27 #include "clang/ExtractAPI/FrontendActions.h"
28 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
29 #include "clang/Frontend/ASTConsumers.h"
30 #include "clang/Frontend/CompilerInstance.h"
31 #include "clang/Frontend/FrontendOptions.h"
32 #include "clang/Frontend/MultiplexConsumer.h"
33 #include "clang/Lex/MacroInfo.h"
34 #include "clang/Lex/PPCallbacks.h"
35 #include "clang/Lex/Preprocessor.h"
36 #include "clang/Lex/PreprocessorOptions.h"
37 #include "llvm/ADT/DenseSet.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallString.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/Error.h"
43 #include "llvm/Support/FileSystem.h"
44 #include "llvm/Support/MemoryBuffer.h"
45 #include "llvm/Support/Path.h"
46 #include "llvm/Support/Regex.h"
47 #include "llvm/Support/raw_ostream.h"
48 #include <memory>
49 #include <optional>
50 #include <utility>
51 
52 using namespace clang;
53 using namespace extractapi;
54 
55 namespace {
56 
57 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI,
58                                                   StringRef File,
59                                                   bool *IsQuoted = nullptr) {
60   assert(CI.hasFileManager() &&
61          "CompilerInstance does not have a FileNamager!");
62 
63   using namespace llvm::sys;
64   // Matches framework include patterns
65   const llvm::Regex Rule("/(.+)\\.framework/(.+)?Headers/(.+)");
66 
67   const auto &FS = CI.getVirtualFileSystem();
68 
69   SmallString<128> FilePath(File.begin(), File.end());
70   FS.makeAbsolute(FilePath);
71   path::remove_dots(FilePath, true);
72   FilePath = path::convert_to_slash(FilePath);
73   File = FilePath;
74 
75   // Checks whether `Dir` is a strict path prefix of `File`. If so returns
76   // the prefix length. Otherwise return 0.
77   auto CheckDir = [&](llvm::StringRef Dir) -> unsigned {
78     llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());
79     FS.makeAbsolute(DirPath);
80     path::remove_dots(DirPath, true);
81     Dir = DirPath;
82     for (auto NI = path::begin(File), NE = path::end(File),
83               DI = path::begin(Dir), DE = path::end(Dir);
84          /*termination condition in loop*/; ++NI, ++DI) {
85       // '.' components in File are ignored.
86       while (NI != NE && *NI == ".")
87         ++NI;
88       if (NI == NE)
89         break;
90 
91       // '.' components in Dir are ignored.
92       while (DI != DE && *DI == ".")
93         ++DI;
94 
95       // Dir is a prefix of File, up to '.' components and choice of path
96       // separators.
97       if (DI == DE)
98         return NI - path::begin(File);
99 
100       // Consider all path separators equal.
101       if (NI->size() == 1 && DI->size() == 1 &&
102           path::is_separator(NI->front()) && path::is_separator(DI->front()))
103         continue;
104 
105       // Special case Apple .sdk folders since the search path is typically a
106       // symlink like `iPhoneSimulator14.5.sdk` while the file is instead
107       // located in `iPhoneSimulator.sdk` (the real folder).
108       if (NI->ends_with(".sdk") && DI->ends_with(".sdk")) {
109         StringRef NBasename = path::stem(*NI);
110         StringRef DBasename = path::stem(*DI);
111         if (DBasename.starts_with(NBasename))
112           continue;
113       }
114 
115       if (*NI != *DI)
116         break;
117     }
118     return 0;
119   };
120 
121   unsigned PrefixLength = 0;
122 
123   // Go through the search paths and find the first one that is a prefix of
124   // the header.
125   for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) {
126     // Note whether the match is found in a quoted entry.
127     if (IsQuoted)
128       *IsQuoted = Entry.Group == frontend::Quoted;
129 
130     if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) {
131       if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) {
132         // If this is a headermap entry, try to reverse lookup the full path
133         // for a spelled name before mapping.
134         StringRef SpelledFilename = HMap->reverseLookupFilename(File);
135         if (!SpelledFilename.empty())
136           return SpelledFilename.str();
137 
138         // No matching mapping in this headermap, try next search entry.
139         continue;
140       }
141     }
142 
143     // Entry is a directory search entry, try to check if it's a prefix of File.
144     PrefixLength = CheckDir(Entry.Path);
145     if (PrefixLength > 0) {
146       // The header is found in a framework path, construct the framework-style
147       // include name `<Framework/Header.h>`
148       if (Entry.IsFramework) {
149         SmallVector<StringRef, 4> Matches;
150         Rule.match(File, &Matches);
151         // Returned matches are always in stable order.
152         if (Matches.size() != 4)
153           return std::nullopt;
154 
155         return path::convert_to_slash(
156             (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" +
157              Matches[3])
158                 .str());
159       }
160 
161       // The header is found in a normal search path, strip the search path
162       // prefix to get an include name.
163       return path::convert_to_slash(File.drop_front(PrefixLength));
164     }
165   }
166 
167   // Couldn't determine a include name, use full path instead.
168   return std::nullopt;
169 }
170 
171 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI,
172                                                   FileEntryRef FE,
173                                                   bool *IsQuoted = nullptr) {
174   return getRelativeIncludeName(CI, FE.getNameAsRequested(), IsQuoted);
175 }
176 
177 struct LocationFileChecker {
178   bool operator()(SourceLocation Loc) {
179     // If the loc refers to a macro expansion we need to first get the file
180     // location of the expansion.
181     auto &SM = CI.getSourceManager();
182     auto FileLoc = SM.getFileLoc(Loc);
183     FileID FID = SM.getFileID(FileLoc);
184     if (FID.isInvalid())
185       return false;
186 
187     OptionalFileEntryRef File = SM.getFileEntryRefForID(FID);
188     if (!File)
189       return false;
190 
191     if (KnownFileEntries.count(*File))
192       return true;
193 
194     if (ExternalFileEntries.count(*File))
195       return false;
196 
197     // Try to reduce the include name the same way we tried to include it.
198     bool IsQuoted = false;
199     if (auto IncludeName = getRelativeIncludeName(CI, *File, &IsQuoted))
200       if (llvm::any_of(KnownFiles,
201                        [&IsQuoted, &IncludeName](const auto &KnownFile) {
202                          return KnownFile.first.equals(*IncludeName) &&
203                                 KnownFile.second == IsQuoted;
204                        })) {
205         KnownFileEntries.insert(*File);
206         return true;
207       }
208 
209     // Record that the file was not found to avoid future reverse lookup for
210     // the same file.
211     ExternalFileEntries.insert(*File);
212     return false;
213   }
214 
215   LocationFileChecker(const CompilerInstance &CI,
216                       SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles)
217       : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() {
218     for (const auto &KnownFile : KnownFiles)
219       if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first))
220         KnownFileEntries.insert(*FileEntry);
221   }
222 
223 private:
224   const CompilerInstance &CI;
225   SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles;
226   llvm::DenseSet<const FileEntry *> KnownFileEntries;
227   llvm::DenseSet<const FileEntry *> ExternalFileEntries;
228 };
229 
230 struct BatchExtractAPIVisitor : ExtractAPIVisitor<BatchExtractAPIVisitor> {
231   bool shouldDeclBeIncluded(const Decl *D) const {
232     bool ShouldBeIncluded = true;
233     // Check that we have the definition for redeclarable types.
234     if (auto *TD = llvm::dyn_cast<TagDecl>(D))
235       ShouldBeIncluded = TD->isThisDeclarationADefinition();
236     else if (auto *Interface = llvm::dyn_cast<ObjCInterfaceDecl>(D))
237       ShouldBeIncluded = Interface->isThisDeclarationADefinition();
238     else if (auto *Protocol = llvm::dyn_cast<ObjCProtocolDecl>(D))
239       ShouldBeIncluded = Protocol->isThisDeclarationADefinition();
240 
241     ShouldBeIncluded = ShouldBeIncluded && LCF(D->getLocation());
242     return ShouldBeIncluded;
243   }
244 
245   BatchExtractAPIVisitor(LocationFileChecker &LCF, ASTContext &Context,
246                          APISet &API)
247       : ExtractAPIVisitor<BatchExtractAPIVisitor>(Context, API), LCF(LCF) {}
248 
249 private:
250   LocationFileChecker &LCF;
251 };
252 
253 class WrappingExtractAPIConsumer : public ASTConsumer {
254 public:
255   WrappingExtractAPIConsumer(ASTContext &Context, APISet &API)
256       : Visitor(Context, API) {}
257 
258   void HandleTranslationUnit(ASTContext &Context) override {
259     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
260     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
261   }
262 
263 private:
264   ExtractAPIVisitor<> Visitor;
265 };
266 
267 class ExtractAPIConsumer : public ASTConsumer {
268 public:
269   ExtractAPIConsumer(ASTContext &Context,
270                      std::unique_ptr<LocationFileChecker> LCF, APISet &API)
271       : Visitor(*LCF, Context, API), LCF(std::move(LCF)) {}
272 
273   void HandleTranslationUnit(ASTContext &Context) override {
274     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
275     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
276   }
277 
278 private:
279   BatchExtractAPIVisitor Visitor;
280   std::unique_ptr<LocationFileChecker> LCF;
281 };
282 
283 class MacroCallback : public PPCallbacks {
284 public:
285   MacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP)
286       : SM(SM), API(API), PP(PP) {}
287 
288   void MacroDefined(const Token &MacroNameToken,
289                     const MacroDirective *MD) override {
290     auto *MacroInfo = MD->getMacroInfo();
291 
292     if (MacroInfo->isBuiltinMacro())
293       return;
294 
295     auto SourceLoc = MacroNameToken.getLocation();
296     if (SM.isWrittenInBuiltinFile(SourceLoc) ||
297         SM.isWrittenInCommandLineFile(SourceLoc))
298       return;
299 
300     PendingMacros.emplace_back(MacroNameToken, MD);
301   }
302 
303   // If a macro gets undefined at some point during preprocessing of the inputs
304   // it means that it isn't an exposed API and we should therefore not add a
305   // macro definition for it.
306   void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD,
307                       const MacroDirective *Undef) override {
308     // If this macro wasn't previously defined we don't need to do anything
309     // here.
310     if (!Undef)
311       return;
312 
313     llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) {
314       return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP,
315                                               /*Syntactically*/ false);
316     });
317   }
318 
319   void EndOfMainFile() override {
320     for (auto &PM : PendingMacros) {
321       // `isUsedForHeaderGuard` is only set when the preprocessor leaves the
322       // file so check for it here.
323       if (PM.MD->getMacroInfo()->isUsedForHeaderGuard())
324         continue;
325 
326       if (!shouldMacroBeIncluded(PM))
327         continue;
328 
329       StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName();
330       PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation());
331       StringRef USR =
332           API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM);
333 
334       API.addMacroDefinition(
335           Name, USR, Loc,
336           DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD),
337           DeclarationFragmentsBuilder::getSubHeadingForMacro(Name),
338           SM.isInSystemHeader(PM.MacroNameToken.getLocation()));
339     }
340 
341     PendingMacros.clear();
342   }
343 
344 protected:
345   struct PendingMacro {
346     Token MacroNameToken;
347     const MacroDirective *MD;
348 
349     PendingMacro(const Token &MacroNameToken, const MacroDirective *MD)
350         : MacroNameToken(MacroNameToken), MD(MD) {}
351   };
352 
353   virtual bool shouldMacroBeIncluded(const PendingMacro &PM) { return true; }
354 
355   const SourceManager &SM;
356   APISet &API;
357   Preprocessor &PP;
358   llvm::SmallVector<PendingMacro> PendingMacros;
359 };
360 
361 class APIMacroCallback : public MacroCallback {
362 public:
363   APIMacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP,
364                    LocationFileChecker &LCF)
365       : MacroCallback(SM, API, PP), LCF(LCF) {}
366 
367   bool shouldMacroBeIncluded(const PendingMacro &PM) override {
368     // Do not include macros from external files
369     return LCF(PM.MacroNameToken.getLocation());
370   }
371 
372 private:
373   LocationFileChecker &LCF;
374 };
375 
376 } // namespace
377 
378 void ExtractAPIActionBase::ImplEndSourceFileAction() {
379   if (!OS)
380     return;
381 
382   // Setup a SymbolGraphSerializer to write out collected API information in
383   // the Symbol Graph format.
384   // FIXME: Make the kind of APISerializer configurable.
385   SymbolGraphSerializer SGSerializer(*API, IgnoresList);
386   SGSerializer.serialize(*OS);
387   OS.reset();
388 }
389 
390 std::unique_ptr<raw_pwrite_stream>
391 ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) {
392   std::unique_ptr<raw_pwrite_stream> OS;
393   OS = CI.createDefaultOutputFile(/*Binary=*/false, InFile,
394                                   /*Extension=*/"json",
395                                   /*RemoveFileOnSignal=*/false);
396   if (!OS)
397     return nullptr;
398   return OS;
399 }
400 
401 std::unique_ptr<ASTConsumer>
402 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
403   OS = CreateOutputFile(CI, InFile);
404 
405   if (!OS)
406     return nullptr;
407 
408   auto ProductName = CI.getFrontendOpts().ProductName;
409 
410   // Now that we have enough information about the language options and the
411   // target triple, let's create the APISet before anyone uses it.
412   API = std::make_unique<APISet>(
413       CI.getTarget().getTriple(),
414       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
415 
416   auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles);
417 
418   CI.getPreprocessor().addPPCallbacks(std::make_unique<APIMacroCallback>(
419       CI.getSourceManager(), *API, CI.getPreprocessor(), *LCF));
420 
421   // Do not include location in anonymous decls.
422   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
423   Policy.AnonymousTagLocations = false;
424   CI.getASTContext().setPrintingPolicy(Policy);
425 
426   if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) {
427     llvm::handleAllErrors(
428         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList,
429                                CI.getFileManager())
430             .moveInto(IgnoresList),
431         [&CI](const IgnoresFileNotFound &Err) {
432           CI.getDiagnostics().Report(
433               diag::err_extract_api_ignores_file_not_found)
434               << Err.Path;
435         });
436   }
437 
438   return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(),
439                                               std::move(LCF), *API);
440 }
441 
442 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
443   auto &Inputs = CI.getFrontendOpts().Inputs;
444   if (Inputs.empty())
445     return true;
446 
447   if (!CI.hasFileManager())
448     if (!CI.createFileManager())
449       return false;
450 
451   auto Kind = Inputs[0].getKind();
452 
453   // Convert the header file inputs into a single input buffer.
454   SmallString<256> HeaderContents;
455   bool IsQuoted = false;
456   for (const FrontendInputFile &FIF : Inputs) {
457     if (Kind.isObjectiveC())
458       HeaderContents += "#import";
459     else
460       HeaderContents += "#include";
461 
462     StringRef FilePath = FIF.getFile();
463     if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) {
464       if (IsQuoted)
465         HeaderContents += " \"";
466       else
467         HeaderContents += " <";
468 
469       HeaderContents += *RelativeName;
470 
471       if (IsQuoted)
472         HeaderContents += "\"\n";
473       else
474         HeaderContents += ">\n";
475       KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName),
476                                    IsQuoted);
477     } else {
478       HeaderContents += " \"";
479       HeaderContents += FilePath;
480       HeaderContents += "\"\n";
481       KnownInputFiles.emplace_back(FilePath, true);
482     }
483   }
484 
485   if (CI.getHeaderSearchOpts().Verbose)
486     CI.getVerboseOutputStream() << getInputBufferName() << ":\n"
487                                 << HeaderContents << "\n";
488 
489   Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents,
490                                                 getInputBufferName());
491 
492   // Set that buffer up as our "real" input in the CompilerInstance.
493   Inputs.clear();
494   Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false);
495 
496   return true;
497 }
498 
499 void ExtractAPIAction::EndSourceFileAction() { ImplEndSourceFileAction(); }
500 
501 std::unique_ptr<ASTConsumer>
502 WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI,
503                                             StringRef InFile) {
504   auto OtherConsumer = WrapperFrontendAction::CreateASTConsumer(CI, InFile);
505   if (!OtherConsumer)
506     return nullptr;
507 
508   CreatedASTConsumer = true;
509 
510   OS = CreateOutputFile(CI, InFile);
511   if (!OS)
512     return nullptr;
513 
514   auto ProductName = CI.getFrontendOpts().ProductName;
515 
516   // Now that we have enough information about the language options and the
517   // target triple, let's create the APISet before anyone uses it.
518   API = std::make_unique<APISet>(
519       CI.getTarget().getTriple(),
520       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
521 
522   CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>(
523       CI.getSourceManager(), *API, CI.getPreprocessor()));
524 
525   // Do not include location in anonymous decls.
526   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
527   Policy.AnonymousTagLocations = false;
528   CI.getASTContext().setPrintingPolicy(Policy);
529 
530   if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) {
531     llvm::handleAllErrors(
532         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList,
533                                CI.getFileManager())
534             .moveInto(IgnoresList),
535         [&CI](const IgnoresFileNotFound &Err) {
536           CI.getDiagnostics().Report(
537               diag::err_extract_api_ignores_file_not_found)
538               << Err.Path;
539         });
540   }
541 
542   auto WrappingConsumer =
543       std::make_unique<WrappingExtractAPIConsumer>(CI.getASTContext(), *API);
544   std::vector<std::unique_ptr<ASTConsumer>> Consumers;
545   Consumers.push_back(std::move(OtherConsumer));
546   Consumers.push_back(std::move(WrappingConsumer));
547 
548   return std::make_unique<MultiplexConsumer>(std::move(Consumers));
549 }
550 
551 void WrappingExtractAPIAction::EndSourceFileAction() {
552   // Invoke wrapped action's method.
553   WrapperFrontendAction::EndSourceFileAction();
554 
555   if (CreatedASTConsumer) {
556     ImplEndSourceFileAction();
557   }
558 }
559 
560 std::unique_ptr<raw_pwrite_stream>
561 WrappingExtractAPIAction::CreateOutputFile(CompilerInstance &CI,
562                                            StringRef InFile) {
563   std::unique_ptr<raw_pwrite_stream> OS;
564   std::string OutputDir = CI.getFrontendOpts().SymbolGraphOutputDir;
565 
566   // The symbol graphs need to be generated as a side effect of regular
567   // compilation so the output should be dumped in the directory provided with
568   // the command line option.
569   llvm::SmallString<128> OutFilePath(OutputDir);
570   auto Seperator = llvm::sys::path::get_separator();
571   auto Infilename = llvm::sys::path::filename(InFile);
572   OutFilePath.append({Seperator, Infilename});
573   llvm::sys::path::replace_extension(OutFilePath, "json");
574   // StringRef outputFilePathref = *OutFilePath;
575 
576   // don't use the default output file
577   OS = CI.createOutputFile(/*OutputPath=*/OutFilePath, /*Binary=*/false,
578                            /*RemoveFileOnSignal=*/true,
579                            /*UseTemporary=*/true,
580                            /*CreateMissingDirectories=*/true);
581   if (!OS)
582     return nullptr;
583   return OS;
584 }
585