1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API
11 /// information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/AST/ASTConcept.h"
16 #include "clang/AST/ASTConsumer.h"
17 #include "clang/AST/ASTContext.h"
18 #include "clang/AST/DeclObjC.h"
19 #include "clang/Basic/DiagnosticFrontend.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Basic/TargetInfo.h"
23 #include "clang/ExtractAPI/API.h"
24 #include "clang/ExtractAPI/APIIgnoresList.h"
25 #include "clang/ExtractAPI/ExtractAPIVisitor.h"
26 #include "clang/ExtractAPI/FrontendActions.h"
27 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
28 #include "clang/Frontend/ASTConsumers.h"
29 #include "clang/Frontend/CompilerInstance.h"
30 #include "clang/Frontend/FrontendOptions.h"
31 #include "clang/Frontend/MultiplexConsumer.h"
32 #include "clang/Lex/MacroInfo.h"
33 #include "clang/Lex/PPCallbacks.h"
34 #include "clang/Lex/Preprocessor.h"
35 #include "clang/Lex/PreprocessorOptions.h"
36 #include "llvm/ADT/DenseSet.h"
37 #include "llvm/ADT/STLExtras.h"
38 #include "llvm/ADT/SmallString.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Error.h"
42 #include "llvm/Support/FileSystem.h"
43 #include "llvm/Support/MemoryBuffer.h"
44 #include "llvm/Support/Path.h"
45 #include "llvm/Support/Regex.h"
46 #include "llvm/Support/raw_ostream.h"
47 #include <memory>
48 #include <optional>
49 #include <utility>
50 
51 using namespace clang;
52 using namespace extractapi;
53 
54 namespace {
55 
56 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI,
57                                                   StringRef File,
58                                                   bool *IsQuoted = nullptr) {
59   assert(CI.hasFileManager() &&
60          "CompilerInstance does not have a FileNamager!");
61 
62   using namespace llvm::sys;
63   // Matches framework include patterns
64   const llvm::Regex Rule("/(.+)\\.framework/(.+)?Headers/(.+)");
65 
66   const auto &FS = CI.getVirtualFileSystem();
67 
68   SmallString<128> FilePath(File.begin(), File.end());
69   FS.makeAbsolute(FilePath);
70   path::remove_dots(FilePath, true);
71   FilePath = path::convert_to_slash(FilePath);
72   File = FilePath;
73 
74   // Checks whether `Dir` is a strict path prefix of `File`. If so returns
75   // the prefix length. Otherwise return 0.
76   auto CheckDir = [&](llvm::StringRef Dir) -> unsigned {
77     llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());
78     FS.makeAbsolute(DirPath);
79     path::remove_dots(DirPath, true);
80     Dir = DirPath;
81     for (auto NI = path::begin(File), NE = path::end(File),
82               DI = path::begin(Dir), DE = path::end(Dir);
83          /*termination condition in loop*/; ++NI, ++DI) {
84       // '.' components in File are ignored.
85       while (NI != NE && *NI == ".")
86         ++NI;
87       if (NI == NE)
88         break;
89 
90       // '.' components in Dir are ignored.
91       while (DI != DE && *DI == ".")
92         ++DI;
93 
94       // Dir is a prefix of File, up to '.' components and choice of path
95       // separators.
96       if (DI == DE)
97         return NI - path::begin(File);
98 
99       // Consider all path separators equal.
100       if (NI->size() == 1 && DI->size() == 1 &&
101           path::is_separator(NI->front()) && path::is_separator(DI->front()))
102         continue;
103 
104       // Special case Apple .sdk folders since the search path is typically a
105       // symlink like `iPhoneSimulator14.5.sdk` while the file is instead
106       // located in `iPhoneSimulator.sdk` (the real folder).
107       if (NI->endswith(".sdk") && DI->endswith(".sdk")) {
108         StringRef NBasename = path::stem(*NI);
109         StringRef DBasename = path::stem(*DI);
110         if (DBasename.startswith(NBasename))
111           continue;
112       }
113 
114       if (*NI != *DI)
115         break;
116     }
117     return 0;
118   };
119 
120   unsigned PrefixLength = 0;
121 
122   // Go through the search paths and find the first one that is a prefix of
123   // the header.
124   for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) {
125     // Note whether the match is found in a quoted entry.
126     if (IsQuoted)
127       *IsQuoted = Entry.Group == frontend::Quoted;
128 
129     if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) {
130       if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) {
131         // If this is a headermap entry, try to reverse lookup the full path
132         // for a spelled name before mapping.
133         StringRef SpelledFilename = HMap->reverseLookupFilename(File);
134         if (!SpelledFilename.empty())
135           return SpelledFilename.str();
136 
137         // No matching mapping in this headermap, try next search entry.
138         continue;
139       }
140     }
141 
142     // Entry is a directory search entry, try to check if it's a prefix of File.
143     PrefixLength = CheckDir(Entry.Path);
144     if (PrefixLength > 0) {
145       // The header is found in a framework path, construct the framework-style
146       // include name `<Framework/Header.h>`
147       if (Entry.IsFramework) {
148         SmallVector<StringRef, 4> Matches;
149         Rule.match(File, &Matches);
150         // Returned matches are always in stable order.
151         if (Matches.size() != 4)
152           return std::nullopt;
153 
154         return path::convert_to_slash(
155             (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" +
156              Matches[3])
157                 .str());
158       }
159 
160       // The header is found in a normal search path, strip the search path
161       // prefix to get an include name.
162       return path::convert_to_slash(File.drop_front(PrefixLength));
163     }
164   }
165 
166   // Couldn't determine a include name, use full path instead.
167   return std::nullopt;
168 }
169 
170 struct LocationFileChecker {
171   bool operator()(SourceLocation Loc) {
172     // If the loc refers to a macro expansion we need to first get the file
173     // location of the expansion.
174     auto &SM = CI.getSourceManager();
175     auto FileLoc = SM.getFileLoc(Loc);
176     FileID FID = SM.getFileID(FileLoc);
177     if (FID.isInvalid())
178       return false;
179 
180     const auto *File = SM.getFileEntryForID(FID);
181     if (!File)
182       return false;
183 
184     if (KnownFileEntries.count(File))
185       return true;
186 
187     if (ExternalFileEntries.count(File))
188       return false;
189 
190     StringRef FileName = File->tryGetRealPathName().empty()
191                              ? File->getName()
192                              : File->tryGetRealPathName();
193 
194     // Try to reduce the include name the same way we tried to include it.
195     bool IsQuoted = false;
196     if (auto IncludeName = getRelativeIncludeName(CI, FileName, &IsQuoted))
197       if (llvm::any_of(KnownFiles,
198                        [&IsQuoted, &IncludeName](const auto &KnownFile) {
199                          return KnownFile.first.equals(*IncludeName) &&
200                                 KnownFile.second == IsQuoted;
201                        })) {
202         KnownFileEntries.insert(File);
203         return true;
204       }
205 
206     // Record that the file was not found to avoid future reverse lookup for
207     // the same file.
208     ExternalFileEntries.insert(File);
209     return false;
210   }
211 
212   LocationFileChecker(const CompilerInstance &CI,
213                       SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles)
214       : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() {
215     for (const auto &KnownFile : KnownFiles)
216       if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first))
217         KnownFileEntries.insert(*FileEntry);
218   }
219 
220 private:
221   const CompilerInstance &CI;
222   SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles;
223   llvm::DenseSet<const FileEntry *> KnownFileEntries;
224   llvm::DenseSet<const FileEntry *> ExternalFileEntries;
225 };
226 
227 struct BatchExtractAPIVisitor : ExtractAPIVisitor<BatchExtractAPIVisitor> {
228   bool shouldDeclBeIncluded(const Decl *D) const {
229     bool ShouldBeIncluded = true;
230     // Check that we have the definition for redeclarable types.
231     if (auto *TD = llvm::dyn_cast<TagDecl>(D))
232       ShouldBeIncluded = TD->isThisDeclarationADefinition();
233     else if (auto *Interface = llvm::dyn_cast<ObjCInterfaceDecl>(D))
234       ShouldBeIncluded = Interface->isThisDeclarationADefinition();
235     else if (auto *Protocol = llvm::dyn_cast<ObjCProtocolDecl>(D))
236       ShouldBeIncluded = Protocol->isThisDeclarationADefinition();
237 
238     ShouldBeIncluded = ShouldBeIncluded && LCF(D->getLocation());
239     return ShouldBeIncluded;
240   }
241 
242   BatchExtractAPIVisitor(LocationFileChecker &LCF, ASTContext &Context,
243                          APISet &API)
244       : ExtractAPIVisitor<BatchExtractAPIVisitor>(Context, API), LCF(LCF) {}
245 
246 private:
247   LocationFileChecker &LCF;
248 };
249 
250 class WrappingExtractAPIConsumer : public ASTConsumer {
251 public:
252   WrappingExtractAPIConsumer(ASTContext &Context, APISet &API)
253       : Visitor(Context, API) {}
254 
255   void HandleTranslationUnit(ASTContext &Context) override {
256     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
257     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
258   }
259 
260 private:
261   ExtractAPIVisitor<> Visitor;
262 };
263 
264 class ExtractAPIConsumer : public ASTConsumer {
265 public:
266   ExtractAPIConsumer(ASTContext &Context,
267                      std::unique_ptr<LocationFileChecker> LCF, APISet &API)
268       : Visitor(*LCF, Context, API), LCF(std::move(LCF)) {}
269 
270   void HandleTranslationUnit(ASTContext &Context) override {
271     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
272     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
273   }
274 
275 private:
276   BatchExtractAPIVisitor Visitor;
277   std::unique_ptr<LocationFileChecker> LCF;
278 };
279 
280 class MacroCallback : public PPCallbacks {
281 public:
282   MacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP)
283       : SM(SM), API(API), PP(PP) {}
284 
285   void MacroDefined(const Token &MacroNameToken,
286                     const MacroDirective *MD) override {
287     auto *MacroInfo = MD->getMacroInfo();
288 
289     if (MacroInfo->isBuiltinMacro())
290       return;
291 
292     auto SourceLoc = MacroNameToken.getLocation();
293     if (SM.isWrittenInBuiltinFile(SourceLoc) ||
294         SM.isWrittenInCommandLineFile(SourceLoc))
295       return;
296 
297     PendingMacros.emplace_back(MacroNameToken, MD);
298   }
299 
300   // If a macro gets undefined at some point during preprocessing of the inputs
301   // it means that it isn't an exposed API and we should therefore not add a
302   // macro definition for it.
303   void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD,
304                       const MacroDirective *Undef) override {
305     // If this macro wasn't previously defined we don't need to do anything
306     // here.
307     if (!Undef)
308       return;
309 
310     llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) {
311       return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP,
312                                               /*Syntactically*/ false);
313     });
314   }
315 
316   void EndOfMainFile() override {
317     for (auto &PM : PendingMacros) {
318       // `isUsedForHeaderGuard` is only set when the preprocessor leaves the
319       // file so check for it here.
320       if (PM.MD->getMacroInfo()->isUsedForHeaderGuard())
321         continue;
322 
323       if (!shouldMacroBeIncluded(PM))
324         continue;
325 
326       StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName();
327       PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation());
328       StringRef USR =
329           API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM);
330 
331       API.addMacroDefinition(
332           Name, USR, Loc,
333           DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD),
334           DeclarationFragmentsBuilder::getSubHeadingForMacro(Name),
335           SM.isInSystemHeader(PM.MacroNameToken.getLocation()));
336     }
337 
338     PendingMacros.clear();
339   }
340 
341 protected:
342   struct PendingMacro {
343     Token MacroNameToken;
344     const MacroDirective *MD;
345 
346     PendingMacro(const Token &MacroNameToken, const MacroDirective *MD)
347         : MacroNameToken(MacroNameToken), MD(MD) {}
348   };
349 
350   virtual bool shouldMacroBeIncluded(const PendingMacro &PM) { return true; }
351 
352   const SourceManager &SM;
353   APISet &API;
354   Preprocessor &PP;
355   llvm::SmallVector<PendingMacro> PendingMacros;
356 };
357 
358 class APIMacroCallback : public MacroCallback {
359 public:
360   APIMacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP,
361                    LocationFileChecker &LCF)
362       : MacroCallback(SM, API, PP), LCF(LCF) {}
363 
364   bool shouldMacroBeIncluded(const PendingMacro &PM) override {
365     // Do not include macros from external files
366     return LCF(PM.MacroNameToken.getLocation());
367   }
368 
369 private:
370   LocationFileChecker &LCF;
371 };
372 
373 } // namespace
374 
375 void ExtractAPIActionBase::ImplEndSourceFileAction() {
376   if (!OS)
377     return;
378 
379   // Setup a SymbolGraphSerializer to write out collected API information in
380   // the Symbol Graph format.
381   // FIXME: Make the kind of APISerializer configurable.
382   SymbolGraphSerializer SGSerializer(*API, IgnoresList);
383   SGSerializer.serialize(*OS);
384   OS.reset();
385 }
386 
387 std::unique_ptr<raw_pwrite_stream>
388 ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) {
389   std::unique_ptr<raw_pwrite_stream> OS;
390   OS = CI.createDefaultOutputFile(/*Binary=*/false, InFile,
391                                   /*Extension=*/"json",
392                                   /*RemoveFileOnSignal=*/false);
393   if (!OS)
394     return nullptr;
395   return OS;
396 }
397 
398 std::unique_ptr<ASTConsumer>
399 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
400   OS = CreateOutputFile(CI, InFile);
401 
402   if (!OS)
403     return nullptr;
404 
405   auto ProductName = CI.getFrontendOpts().ProductName;
406 
407   // Now that we have enough information about the language options and the
408   // target triple, let's create the APISet before anyone uses it.
409   API = std::make_unique<APISet>(
410       CI.getTarget().getTriple(),
411       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
412 
413   auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles);
414 
415   CI.getPreprocessor().addPPCallbacks(std::make_unique<APIMacroCallback>(
416       CI.getSourceManager(), *API, CI.getPreprocessor(), *LCF));
417 
418   // Do not include location in anonymous decls.
419   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
420   Policy.AnonymousTagLocations = false;
421   CI.getASTContext().setPrintingPolicy(Policy);
422 
423   if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) {
424     llvm::handleAllErrors(
425         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList,
426                                CI.getFileManager())
427             .moveInto(IgnoresList),
428         [&CI](const IgnoresFileNotFound &Err) {
429           CI.getDiagnostics().Report(
430               diag::err_extract_api_ignores_file_not_found)
431               << Err.Path;
432         });
433   }
434 
435   return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(),
436                                               std::move(LCF), *API);
437 }
438 
439 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
440   auto &Inputs = CI.getFrontendOpts().Inputs;
441   if (Inputs.empty())
442     return true;
443 
444   if (!CI.hasFileManager())
445     if (!CI.createFileManager())
446       return false;
447 
448   auto Kind = Inputs[0].getKind();
449 
450   // Convert the header file inputs into a single input buffer.
451   SmallString<256> HeaderContents;
452   bool IsQuoted = false;
453   for (const FrontendInputFile &FIF : Inputs) {
454     if (Kind.isObjectiveC())
455       HeaderContents += "#import";
456     else
457       HeaderContents += "#include";
458 
459     StringRef FilePath = FIF.getFile();
460     if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) {
461       if (IsQuoted)
462         HeaderContents += " \"";
463       else
464         HeaderContents += " <";
465 
466       HeaderContents += *RelativeName;
467 
468       if (IsQuoted)
469         HeaderContents += "\"\n";
470       else
471         HeaderContents += ">\n";
472       KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName),
473                                    IsQuoted);
474     } else {
475       HeaderContents += " \"";
476       HeaderContents += FilePath;
477       HeaderContents += "\"\n";
478       KnownInputFiles.emplace_back(FilePath, true);
479     }
480   }
481 
482   if (CI.getHeaderSearchOpts().Verbose)
483     CI.getVerboseOutputStream() << getInputBufferName() << ":\n"
484                                 << HeaderContents << "\n";
485 
486   Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents,
487                                                 getInputBufferName());
488 
489   // Set that buffer up as our "real" input in the CompilerInstance.
490   Inputs.clear();
491   Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false);
492 
493   return true;
494 }
495 
496 void ExtractAPIAction::EndSourceFileAction() { ImplEndSourceFileAction(); }
497 
498 std::unique_ptr<ASTConsumer>
499 WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI,
500                                             StringRef InFile) {
501   auto OtherConsumer = WrapperFrontendAction::CreateASTConsumer(CI, InFile);
502   if (!OtherConsumer)
503     return nullptr;
504 
505   CreatedASTConsumer = true;
506 
507   OS = CreateOutputFile(CI, InFile);
508   if (!OS)
509     return nullptr;
510 
511   auto ProductName = CI.getFrontendOpts().ProductName;
512 
513   // Now that we have enough information about the language options and the
514   // target triple, let's create the APISet before anyone uses it.
515   API = std::make_unique<APISet>(
516       CI.getTarget().getTriple(),
517       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
518 
519   CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>(
520       CI.getSourceManager(), *API, CI.getPreprocessor()));
521 
522   // Do not include location in anonymous decls.
523   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
524   Policy.AnonymousTagLocations = false;
525   CI.getASTContext().setPrintingPolicy(Policy);
526 
527   if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) {
528     llvm::handleAllErrors(
529         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList,
530                                CI.getFileManager())
531             .moveInto(IgnoresList),
532         [&CI](const IgnoresFileNotFound &Err) {
533           CI.getDiagnostics().Report(
534               diag::err_extract_api_ignores_file_not_found)
535               << Err.Path;
536         });
537   }
538 
539   auto WrappingConsumer =
540       std::make_unique<WrappingExtractAPIConsumer>(CI.getASTContext(), *API);
541   std::vector<std::unique_ptr<ASTConsumer>> Consumers;
542   Consumers.push_back(std::move(OtherConsumer));
543   Consumers.push_back(std::move(WrappingConsumer));
544 
545   return std::make_unique<MultiplexConsumer>(std::move(Consumers));
546 }
547 
548 void WrappingExtractAPIAction::EndSourceFileAction() {
549   // Invoke wrapped action's method.
550   WrapperFrontendAction::EndSourceFileAction();
551 
552   if (CreatedASTConsumer) {
553     ImplEndSourceFileAction();
554   }
555 }
556 
557 std::unique_ptr<raw_pwrite_stream>
558 WrappingExtractAPIAction::CreateOutputFile(CompilerInstance &CI,
559                                            StringRef InFile) {
560   std::unique_ptr<raw_pwrite_stream> OS;
561   std::string OutputDir = CI.getFrontendOpts().SymbolGraphOutputDir;
562 
563   // The symbol graphs need to be generated as a side effect of regular
564   // compilation so the output should be dumped in the directory provided with
565   // the command line option.
566   llvm::SmallString<128> OutFilePath(OutputDir);
567   auto Seperator = llvm::sys::path::get_separator();
568   auto Infilename = llvm::sys::path::filename(InFile);
569   OutFilePath.append({Seperator, Infilename});
570   llvm::sys::path::replace_extension(OutFilePath, "json");
571   // StringRef outputFilePathref = *OutFilePath;
572 
573   // don't use the default output file
574   OS = CI.createOutputFile(/*OutputPath=*/OutFilePath, /*Binary=*/false,
575                            /*RemoveFileOnSignal=*/true,
576                            /*UseTemporary=*/true,
577                            /*CreateMissingDirectories=*/true);
578   if (!OS)
579     return nullptr;
580   return OS;
581 }
582