1 //===--- CrossTranslationUnit.h - -------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file provides an interface to load binary AST dumps on demand. This
10 //  feature can be utilized for tools that require cross translation unit
11 //  support.
12 //
13 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_CLANG_CROSSTU_CROSSTRANSLATIONUNIT_H
15 #define LLVM_CLANG_CROSSTU_CROSSTRANSLATIONUNIT_H
16 
17 #include "clang/AST/ASTImporterSharedState.h"
18 #include "clang/Analysis/MacroExpansionContext.h"
19 #include "clang/Basic/LLVM.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/Path.h"
26 
27 namespace clang {
28 class CompilerInstance;
29 class ASTContext;
30 class ASTImporter;
31 class ASTUnit;
32 class DeclContext;
33 class FunctionDecl;
34 class VarDecl;
35 class NamedDecl;
36 class TranslationUnitDecl;
37 
38 namespace cross_tu {
39 
40 enum class index_error_code {
41   success = 0,
42   unspecified = 1,
43   missing_index_file,
44   invalid_index_format,
45   multiple_definitions,
46   missing_definition,
47   failed_import,
48   failed_to_get_external_ast,
49   failed_to_generate_usr,
50   triple_mismatch,
51   lang_mismatch,
52   lang_dialect_mismatch,
53   load_threshold_reached,
54   invocation_list_ambiguous,
55   invocation_list_file_not_found,
56   invocation_list_empty,
57   invocation_list_wrong_format,
58   invocation_list_lookup_unsuccessful
59 };
60 
61 class IndexError : public llvm::ErrorInfo<IndexError> {
62 public:
63   static char ID;
64   IndexError(index_error_code C) : Code(C), LineNo(0) {}
65   IndexError(index_error_code C, std::string FileName, int LineNo = 0)
66       : Code(C), FileName(std::move(FileName)), LineNo(LineNo) {}
67   IndexError(index_error_code C, std::string FileName, std::string TripleToName,
68              std::string TripleFromName)
69       : Code(C), FileName(std::move(FileName)),
70         TripleToName(std::move(TripleToName)),
71         TripleFromName(std::move(TripleFromName)) {}
72   void log(raw_ostream &OS) const override;
73   std::error_code convertToErrorCode() const override;
74   index_error_code getCode() const { return Code; }
75   int getLineNum() const { return LineNo; }
76   std::string getFileName() const { return FileName; }
77   std::string getTripleToName() const { return TripleToName; }
78   std::string getTripleFromName() const { return TripleFromName; }
79 
80 private:
81   index_error_code Code;
82   std::string FileName;
83   int LineNo;
84   std::string TripleToName;
85   std::string TripleFromName;
86 };
87 
88 /// This function parses an index file that determines which
89 /// translation unit contains which definition. The IndexPath is not prefixed
90 /// with CTUDir, so an absolute path is expected for consistent results.
91 ///
92 /// The index file format is the following:
93 /// each line consists of an USR and a filepath separated by a space.
94 ///
95 /// \return Returns a map where the USR is the key and the filepath is the value
96 ///         or an error.
97 llvm::Expected<llvm::StringMap<std::string>>
98 parseCrossTUIndex(StringRef IndexPath);
99 
100 std::string createCrossTUIndexString(const llvm::StringMap<std::string> &Index);
101 
102 using InvocationListTy = llvm::StringMap<llvm::SmallVector<std::string, 32>>;
103 /// Parse the YAML formatted invocation list file content \p FileContent.
104 /// The format is expected to be a mapping from from absolute source file
105 /// paths in the filesystem to a list of command-line parts, which
106 /// constitute the invocation needed to compile that file. That invocation
107 /// will be used to produce the AST of the TU.
108 llvm::Expected<InvocationListTy> parseInvocationList(
109     StringRef FileContent,
110     llvm::sys::path::Style PathStyle = llvm::sys::path::Style::posix);
111 
112 /// Returns true if it makes sense to import a foreign variable definition.
113 /// For instance, we don't want to import variables that have non-trivial types
114 /// because the constructor might have side-effects.
115 bool shouldImport(const VarDecl *VD, const ASTContext &ACtx);
116 
117 /// This class is used for tools that requires cross translation
118 ///        unit capability.
119 ///
120 /// This class can load definitions from external AST sources.
121 /// The loaded definition will be merged back to the original AST using the
122 /// AST Importer.
123 /// In order to use this class, an index file is required that describes
124 /// the locations of the AST files for each definition.
125 ///
126 /// Note that this class also implements caching.
127 class CrossTranslationUnitContext {
128 public:
129   CrossTranslationUnitContext(CompilerInstance &CI);
130   ~CrossTranslationUnitContext();
131 
132   /// This function loads a function or variable definition from an
133   ///        external AST file and merges it into the original AST.
134   ///
135   /// This method should only be used on functions that have no definitions or
136   /// variables that have no initializer in
137   /// the current translation unit. A function definition with the same
138   /// declaration will be looked up in the index file which should be in the
139   /// \p CrossTUDir directory, called \p IndexName. In case the declaration is
140   /// found in the index the corresponding AST will be loaded and the
141   /// definition will be merged into the original AST using the AST Importer.
142   ///
143   /// \return The declaration with the definition will be returned.
144   /// If no suitable definition is found in the index file or multiple
145   /// definitions found error will be returned.
146   ///
147   /// Note that the AST files should also be in the \p CrossTUDir.
148   llvm::Expected<const FunctionDecl *>
149   getCrossTUDefinition(const FunctionDecl *FD, StringRef CrossTUDir,
150                        StringRef IndexName, bool DisplayCTUProgress = false);
151   llvm::Expected<const VarDecl *>
152   getCrossTUDefinition(const VarDecl *VD, StringRef CrossTUDir,
153                        StringRef IndexName, bool DisplayCTUProgress = false);
154 
155   /// This function loads a definition from an external AST file.
156   ///
157   /// A definition with the same declaration will be looked up in the
158   /// index file which should be in the \p CrossTUDir directory, called
159   /// \p IndexName. In case the declaration is found in the index the
160   /// corresponding AST will be loaded. If the number of TUs imported
161   /// reaches \p CTULoadTreshold, no loading is performed.
162   ///
163   /// \return Returns a pointer to the ASTUnit that contains the definition of
164   /// the looked up name or an Error.
165   /// The returned pointer is never a nullptr.
166   ///
167   /// Note that the AST files should also be in the \p CrossTUDir.
168   llvm::Expected<ASTUnit *> loadExternalAST(StringRef LookupName,
169                                             StringRef CrossTUDir,
170                                             StringRef IndexName,
171                                             bool DisplayCTUProgress = false);
172 
173   /// This function merges a definition from a separate AST Unit into
174   ///        the current one which was created by the compiler instance that
175   ///        was passed to the constructor.
176   ///
177   /// \return Returns the resulting definition or an error.
178   llvm::Expected<const FunctionDecl *> importDefinition(const FunctionDecl *FD,
179                                                         ASTUnit *Unit);
180   llvm::Expected<const VarDecl *> importDefinition(const VarDecl *VD,
181                                                    ASTUnit *Unit);
182 
183   /// Get a name to identify a named decl.
184   static llvm::Optional<std::string> getLookupName(const NamedDecl *ND);
185 
186   /// Emit diagnostics for the user for potential configuration errors.
187   void emitCrossTUDiagnostics(const IndexError &IE);
188 
189   /// Returns the MacroExpansionContext for the imported TU to which the given
190   /// source-location corresponds.
191   /// \p ToLoc Source location in the imported-to AST.
192   /// \note If any error happens such as \p ToLoc is a non-imported
193   ///       source-location, empty is returned.
194   /// \note Macro expansion tracking for imported TUs is not implemented yet.
195   ///       It returns empty unconditionally.
196   llvm::Optional<clang::MacroExpansionContext>
197   getMacroExpansionContextForSourceLocation(
198       const clang::SourceLocation &ToLoc) const;
199 
200   /// Returns true if the given Decl is newly created during the import.
201   bool isImportedAsNew(const Decl *ToDecl) const;
202 
203   /// Returns true if the given Decl is mapped (or created) during an import
204   /// but there was an unrecoverable error (the AST node cannot be erased, it
205   /// is marked with an Error object in this case).
206   bool hasError(const Decl *ToDecl) const;
207 
208 private:
209   void lazyInitImporterSharedSt(TranslationUnitDecl *ToTU);
210   ASTImporter &getOrCreateASTImporter(ASTUnit *Unit);
211   template <typename T>
212   llvm::Expected<const T *> getCrossTUDefinitionImpl(const T *D,
213                                                      StringRef CrossTUDir,
214                                                      StringRef IndexName,
215                                                      bool DisplayCTUProgress);
216   template <typename T>
217   const T *findDefInDeclContext(const DeclContext *DC,
218                                 StringRef LookupName);
219   template <typename T>
220   llvm::Expected<const T *> importDefinitionImpl(const T *D, ASTUnit *Unit);
221 
222   using ImporterMapTy =
223       llvm::DenseMap<TranslationUnitDecl *, std::unique_ptr<ASTImporter>>;
224 
225   ImporterMapTy ASTUnitImporterMap;
226 
227   ASTContext &Context;
228   std::shared_ptr<ASTImporterSharedState> ImporterSharedSt;
229 
230   using LoadResultTy = llvm::Expected<std::unique_ptr<ASTUnit>>;
231 
232   /// Loads ASTUnits from AST-dumps or source-files.
233   class ASTLoader {
234   public:
235     ASTLoader(CompilerInstance &CI, StringRef CTUDir,
236               StringRef InvocationListFilePath);
237 
238     /// Load the ASTUnit by its identifier found in the index file. If the
239     /// identifier is suffixed with '.ast' it is considered a dump. Otherwise
240     /// it is treated as source-file, and on-demand parsed. Relative paths are
241     /// prefixed with CTUDir.
242     LoadResultTy load(StringRef Identifier);
243 
244     /// Lazily initialize the invocation list information, which is needed for
245     /// on-demand parsing.
246     llvm::Error lazyInitInvocationList();
247 
248   private:
249     /// The style used for storage and lookup of filesystem paths.
250     /// Defaults to posix.
251     const llvm::sys::path::Style PathStyle = llvm::sys::path::Style::posix;
252 
253     /// Loads an AST from a pch-dump.
254     LoadResultTy loadFromDump(StringRef Identifier);
255     /// Loads an AST from a source-file.
256     LoadResultTy loadFromSource(StringRef Identifier);
257 
258     CompilerInstance &CI;
259     StringRef CTUDir;
260     /// The path to the file containing the invocation list, which is in YAML
261     /// format, and contains a mapping from source files to compiler invocations
262     /// that produce the AST used for analysis.
263     StringRef InvocationListFilePath;
264     /// In case of on-demand parsing, the invocations for parsing the source
265     /// files is stored.
266     llvm::Optional<InvocationListTy> InvocationList;
267     index_error_code PreviousParsingResult = index_error_code::success;
268   };
269 
270   /// Maintain number of AST loads and check for reaching the load limit.
271   class ASTLoadGuard {
272   public:
273     ASTLoadGuard(unsigned Limit) : Limit(Limit) {}
274 
275     /// Indicates, whether a new load operation is permitted, it is within the
276     /// threshold.
277     operator bool() const { return Count < Limit; }
278 
279     /// Tell that a new AST was loaded successfully.
280     void indicateLoadSuccess() { ++Count; }
281 
282   private:
283     /// The number of ASTs actually imported.
284     unsigned Count{0u};
285     /// The limit (threshold) value for number of loaded ASTs.
286     const unsigned Limit;
287   };
288 
289   /// Storage and load of ASTUnits, cached access, and providing searchability
290   /// are the concerns of ASTUnitStorage class.
291   class ASTUnitStorage {
292   public:
293     ASTUnitStorage(CompilerInstance &CI);
294     /// Loads an ASTUnit for a function.
295     ///
296     /// \param FunctionName USR name of the function.
297     /// \param CrossTUDir Path to the directory used to store CTU related files.
298     /// \param IndexName Name of the file inside \p CrossTUDir which maps
299     /// function USR names to file paths. These files contain the corresponding
300     /// AST-dumps.
301     /// \param DisplayCTUProgress Display a message about loading new ASTs.
302     ///
303     /// \return An Expected instance which contains the ASTUnit pointer or the
304     /// error occurred during the load.
305     llvm::Expected<ASTUnit *> getASTUnitForFunction(StringRef FunctionName,
306                                                     StringRef CrossTUDir,
307                                                     StringRef IndexName,
308                                                     bool DisplayCTUProgress);
309     /// Identifies the path of the file which can be used to load the ASTUnit
310     /// for a given function.
311     ///
312     /// \param FunctionName USR name of the function.
313     /// \param CrossTUDir Path to the directory used to store CTU related files.
314     /// \param IndexName Name of the file inside \p CrossTUDir which maps
315     /// function USR names to file paths. These files contain the corresponding
316     /// AST-dumps.
317     ///
318     /// \return An Expected instance containing the filepath.
319     llvm::Expected<std::string> getFileForFunction(StringRef FunctionName,
320                                                    StringRef CrossTUDir,
321                                                    StringRef IndexName);
322 
323   private:
324     llvm::Error ensureCTUIndexLoaded(StringRef CrossTUDir, StringRef IndexName);
325     llvm::Expected<ASTUnit *> getASTUnitForFile(StringRef FileName,
326                                                 bool DisplayCTUProgress);
327 
328     template <typename... T> using BaseMapTy = llvm::StringMap<T...>;
329     using OwningMapTy = BaseMapTy<std::unique_ptr<clang::ASTUnit>>;
330     using NonOwningMapTy = BaseMapTy<clang::ASTUnit *>;
331 
332     OwningMapTy FileASTUnitMap;
333     NonOwningMapTy NameASTUnitMap;
334 
335     using IndexMapTy = BaseMapTy<std::string>;
336     IndexMapTy NameFileMap;
337 
338     /// Loads the AST based on the identifier found in the index.
339     ASTLoader Loader;
340 
341     /// Limit the number of loaded ASTs. It is used to limit the  memory usage
342     /// of the CrossTranslationUnitContext. The ASTUnitStorage has the
343     /// information whether the AST to load is actually loaded or returned from
344     /// cache. This information is needed to maintain the counter.
345     ASTLoadGuard LoadGuard;
346   };
347 
348   ASTUnitStorage ASTStorage;
349 };
350 
351 } // namespace cross_tu
352 } // namespace clang
353 
354 #endif // LLVM_CLANG_CROSSTU_CROSSTRANSLATIONUNIT_H
355