1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API
11 /// information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/AST/ASTConsumer.h"
16 #include "clang/AST/ASTContext.h"
17 #include "clang/Basic/DiagnosticFrontend.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "clang/Basic/TargetInfo.h"
21 #include "clang/ExtractAPI/API.h"
22 #include "clang/ExtractAPI/APIIgnoresList.h"
23 #include "clang/ExtractAPI/ExtractAPIVisitor.h"
24 #include "clang/ExtractAPI/FrontendActions.h"
25 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
26 #include "clang/Frontend/ASTConsumers.h"
27 #include "clang/Frontend/CompilerInstance.h"
28 #include "clang/Frontend/FrontendOptions.h"
29 #include "clang/Lex/MacroInfo.h"
30 #include "clang/Lex/PPCallbacks.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "clang/Lex/PreprocessorOptions.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/FileSystem.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/Regex.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <memory>
43 #include <optional>
44 #include <utility>
45 
46 using namespace clang;
47 using namespace extractapi;
48 
49 namespace {
50 
51 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI,
52                                                   StringRef File,
53                                                   bool *IsQuoted = nullptr) {
54   assert(CI.hasFileManager() &&
55          "CompilerInstance does not have a FileNamager!");
56 
57   using namespace llvm::sys;
58   // Matches framework include patterns
59   const llvm::Regex Rule("/(.+)\\.framework/(.+)?Headers/(.+)");
60 
61   const auto &FS = CI.getVirtualFileSystem();
62 
63   SmallString<128> FilePath(File.begin(), File.end());
64   FS.makeAbsolute(FilePath);
65   path::remove_dots(FilePath, true);
66   FilePath = path::convert_to_slash(FilePath);
67   File = FilePath;
68 
69   // Checks whether `Dir` is a strict path prefix of `File`. If so returns
70   // the prefix length. Otherwise return 0.
71   auto CheckDir = [&](llvm::StringRef Dir) -> unsigned {
72     llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());
73     FS.makeAbsolute(DirPath);
74     path::remove_dots(DirPath, true);
75     Dir = DirPath;
76     for (auto NI = path::begin(File), NE = path::end(File),
77               DI = path::begin(Dir), DE = path::end(Dir);
78          /*termination condition in loop*/; ++NI, ++DI) {
79       // '.' components in File are ignored.
80       while (NI != NE && *NI == ".")
81         ++NI;
82       if (NI == NE)
83         break;
84 
85       // '.' components in Dir are ignored.
86       while (DI != DE && *DI == ".")
87         ++DI;
88 
89       // Dir is a prefix of File, up to '.' components and choice of path
90       // separators.
91       if (DI == DE)
92         return NI - path::begin(File);
93 
94       // Consider all path separators equal.
95       if (NI->size() == 1 && DI->size() == 1 &&
96           path::is_separator(NI->front()) && path::is_separator(DI->front()))
97         continue;
98 
99       // Special case Apple .sdk folders since the search path is typically a
100       // symlink like `iPhoneSimulator14.5.sdk` while the file is instead
101       // located in `iPhoneSimulator.sdk` (the real folder).
102       if (NI->endswith(".sdk") && DI->endswith(".sdk")) {
103         StringRef NBasename = path::stem(*NI);
104         StringRef DBasename = path::stem(*DI);
105         if (DBasename.startswith(NBasename))
106           continue;
107       }
108 
109       if (*NI != *DI)
110         break;
111     }
112     return 0;
113   };
114 
115   unsigned PrefixLength = 0;
116 
117   // Go through the search paths and find the first one that is a prefix of
118   // the header.
119   for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) {
120     // Note whether the match is found in a quoted entry.
121     if (IsQuoted)
122       *IsQuoted = Entry.Group == frontend::Quoted;
123 
124     if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) {
125       if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) {
126         // If this is a headermap entry, try to reverse lookup the full path
127         // for a spelled name before mapping.
128         StringRef SpelledFilename = HMap->reverseLookupFilename(File);
129         if (!SpelledFilename.empty())
130           return SpelledFilename.str();
131 
132         // No matching mapping in this headermap, try next search entry.
133         continue;
134       }
135     }
136 
137     // Entry is a directory search entry, try to check if it's a prefix of File.
138     PrefixLength = CheckDir(Entry.Path);
139     if (PrefixLength > 0) {
140       // The header is found in a framework path, construct the framework-style
141       // include name `<Framework/Header.h>`
142       if (Entry.IsFramework) {
143         SmallVector<StringRef, 4> Matches;
144         Rule.match(File, &Matches);
145         // Returned matches are always in stable order.
146         if (Matches.size() != 4)
147           return std::nullopt;
148 
149         return path::convert_to_slash(
150             (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" +
151              Matches[3])
152                 .str());
153       }
154 
155       // The header is found in a normal search path, strip the search path
156       // prefix to get an include name.
157       return path::convert_to_slash(File.drop_front(PrefixLength));
158     }
159   }
160 
161   // Couldn't determine a include name, use full path instead.
162   return std::nullopt;
163 }
164 
165 struct LocationFileChecker {
166   bool operator()(SourceLocation Loc) {
167     // If the loc refers to a macro expansion we need to first get the file
168     // location of the expansion.
169     auto &SM = CI.getSourceManager();
170     auto FileLoc = SM.getFileLoc(Loc);
171     FileID FID = SM.getFileID(FileLoc);
172     if (FID.isInvalid())
173       return false;
174 
175     const auto *File = SM.getFileEntryForID(FID);
176     if (!File)
177       return false;
178 
179     if (KnownFileEntries.count(File))
180       return true;
181 
182     if (ExternalFileEntries.count(File))
183       return false;
184 
185     StringRef FileName = File->tryGetRealPathName().empty()
186                              ? File->getName()
187                              : File->tryGetRealPathName();
188 
189     // Try to reduce the include name the same way we tried to include it.
190     bool IsQuoted = false;
191     if (auto IncludeName = getRelativeIncludeName(CI, FileName, &IsQuoted))
192       if (llvm::any_of(KnownFiles,
193                        [&IsQuoted, &IncludeName](const auto &KnownFile) {
194                          return KnownFile.first.equals(*IncludeName) &&
195                                 KnownFile.second == IsQuoted;
196                        })) {
197         KnownFileEntries.insert(File);
198         return true;
199       }
200 
201     // Record that the file was not found to avoid future reverse lookup for
202     // the same file.
203     ExternalFileEntries.insert(File);
204     return false;
205   }
206 
207   LocationFileChecker(const CompilerInstance &CI,
208                       SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles)
209       : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() {
210     for (const auto &KnownFile : KnownFiles)
211       if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first))
212         KnownFileEntries.insert(*FileEntry);
213   }
214 
215 private:
216   const CompilerInstance &CI;
217   SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles;
218   llvm::DenseSet<const FileEntry *> KnownFileEntries;
219   llvm::DenseSet<const FileEntry *> ExternalFileEntries;
220 };
221 
222 class ExtractAPIConsumer : public ASTConsumer {
223 public:
224   ExtractAPIConsumer(ASTContext &Context,
225                      std::unique_ptr<LocationFileChecker> LCF, APISet &API)
226       : Visitor(Context, *LCF, API), LCF(std::move(LCF)) {}
227 
228   void HandleTranslationUnit(ASTContext &Context) override {
229     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
230     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
231   }
232 
233 private:
234   ExtractAPIVisitor Visitor;
235   std::unique_ptr<LocationFileChecker> LCF;
236 };
237 
238 class MacroCallback : public PPCallbacks {
239 public:
240   MacroCallback(const SourceManager &SM, LocationFileChecker &LCF, APISet &API,
241                 Preprocessor &PP)
242       : SM(SM), LCF(LCF), API(API), PP(PP) {}
243 
244   void MacroDefined(const Token &MacroNameToken,
245                     const MacroDirective *MD) override {
246     auto *MacroInfo = MD->getMacroInfo();
247 
248     if (MacroInfo->isBuiltinMacro())
249       return;
250 
251     auto SourceLoc = MacroNameToken.getLocation();
252     if (SM.isWrittenInBuiltinFile(SourceLoc) ||
253         SM.isWrittenInCommandLineFile(SourceLoc))
254       return;
255 
256     PendingMacros.emplace_back(MacroNameToken, MD);
257   }
258 
259   // If a macro gets undefined at some point during preprocessing of the inputs
260   // it means that it isn't an exposed API and we should therefore not add a
261   // macro definition for it.
262   void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD,
263                       const MacroDirective *Undef) override {
264     // If this macro wasn't previously defined we don't need to do anything
265     // here.
266     if (!Undef)
267       return;
268 
269     llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) {
270       return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP,
271                                               /*Syntactically*/ false);
272     });
273   }
274 
275   void EndOfMainFile() override {
276     for (auto &PM : PendingMacros) {
277       // `isUsedForHeaderGuard` is only set when the preprocessor leaves the
278       // file so check for it here.
279       if (PM.MD->getMacroInfo()->isUsedForHeaderGuard())
280         continue;
281 
282       if (!LCF(PM.MacroNameToken.getLocation()))
283         continue;
284 
285       StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName();
286       PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation());
287       StringRef USR =
288           API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM);
289 
290       API.addMacroDefinition(
291           Name, USR, Loc,
292           DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD),
293           DeclarationFragmentsBuilder::getSubHeadingForMacro(Name),
294           SM.isInSystemHeader(PM.MacroNameToken.getLocation()));
295     }
296 
297     PendingMacros.clear();
298   }
299 
300 private:
301   struct PendingMacro {
302     Token MacroNameToken;
303     const MacroDirective *MD;
304 
305     PendingMacro(const Token &MacroNameToken, const MacroDirective *MD)
306         : MacroNameToken(MacroNameToken), MD(MD) {}
307   };
308 
309   const SourceManager &SM;
310   LocationFileChecker &LCF;
311   APISet &API;
312   Preprocessor &PP;
313   llvm::SmallVector<PendingMacro> PendingMacros;
314 };
315 
316 } // namespace
317 
318 std::unique_ptr<ASTConsumer>
319 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
320   OS = CreateOutputFile(CI, InFile);
321   if (!OS)
322     return nullptr;
323 
324   auto ProductName = CI.getFrontendOpts().ProductName;
325 
326   // Now that we have enough information about the language options and the
327   // target triple, let's create the APISet before anyone uses it.
328   API = std::make_unique<APISet>(
329       CI.getTarget().getTriple(),
330       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
331 
332   auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles);
333 
334   CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>(
335       CI.getSourceManager(), *LCF, *API, CI.getPreprocessor()));
336 
337   // Do not include location in anonymous decls.
338   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
339   Policy.AnonymousTagLocations = false;
340   CI.getASTContext().setPrintingPolicy(Policy);
341 
342   if (!CI.getFrontendOpts().ExtractAPIIgnoresFile.empty()) {
343     llvm::handleAllErrors(
344         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFile,
345                                CI.getFileManager())
346             .moveInto(IgnoresList),
347         [&CI](const IgnoresFileNotFound &Err) {
348           CI.getDiagnostics().Report(
349               diag::err_extract_api_ignores_file_not_found)
350               << Err.Path;
351         });
352   }
353 
354   return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(),
355                                               std::move(LCF), *API);
356 }
357 
358 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
359   auto &Inputs = CI.getFrontendOpts().Inputs;
360   if (Inputs.empty())
361     return true;
362 
363   if (!CI.hasFileManager())
364     if (!CI.createFileManager())
365       return false;
366 
367   auto Kind = Inputs[0].getKind();
368 
369   // Convert the header file inputs into a single input buffer.
370   SmallString<256> HeaderContents;
371   bool IsQuoted = false;
372   for (const FrontendInputFile &FIF : Inputs) {
373     if (Kind.isObjectiveC())
374       HeaderContents += "#import";
375     else
376       HeaderContents += "#include";
377 
378     StringRef FilePath = FIF.getFile();
379     if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) {
380       if (IsQuoted)
381         HeaderContents += " \"";
382       else
383         HeaderContents += " <";
384 
385       HeaderContents += *RelativeName;
386 
387       if (IsQuoted)
388         HeaderContents += "\"\n";
389       else
390         HeaderContents += ">\n";
391       KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName),
392                                    IsQuoted);
393     } else {
394       HeaderContents += " \"";
395       HeaderContents += FilePath;
396       HeaderContents += "\"\n";
397       KnownInputFiles.emplace_back(FilePath, true);
398     }
399   }
400 
401   if (CI.getHeaderSearchOpts().Verbose)
402     CI.getVerboseOutputStream() << getInputBufferName() << ":\n"
403                                 << HeaderContents << "\n";
404 
405   Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents,
406                                                 getInputBufferName());
407 
408   // Set that buffer up as our "real" input in the CompilerInstance.
409   Inputs.clear();
410   Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false);
411 
412   return true;
413 }
414 
415 void ExtractAPIAction::EndSourceFileAction() {
416   if (!OS)
417     return;
418 
419   // Setup a SymbolGraphSerializer to write out collected API information in
420   // the Symbol Graph format.
421   // FIXME: Make the kind of APISerializer configurable.
422   SymbolGraphSerializer SGSerializer(*API, IgnoresList);
423   SGSerializer.serialize(*OS);
424   OS.reset();
425 }
426 
427 std::unique_ptr<raw_pwrite_stream>
428 ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) {
429   std::unique_ptr<raw_pwrite_stream> OS =
430       CI.createDefaultOutputFile(/*Binary=*/false, InFile, /*Extension=*/"json",
431                                  /*RemoveFileOnSignal=*/false);
432   if (!OS)
433     return nullptr;
434   return OS;
435 }
436