1 //===--- CrossTranslationUnit.h - -------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file provides an interface to load binary AST dumps on demand. This
10 //  feature can be utilized for tools that require cross translation unit
11 //  support.
12 //
13 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_CLANG_CROSSTU_CROSSTRANSLATIONUNIT_H
15 #define LLVM_CLANG_CROSSTU_CROSSTRANSLATIONUNIT_H
16 
17 #include "clang/AST/ASTImporterSharedState.h"
18 #include "clang/Basic/LLVM.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/Path.h"
25 
26 namespace clang {
27 class CompilerInstance;
28 class ASTContext;
29 class ASTImporter;
30 class ASTUnit;
31 class DeclContext;
32 class FunctionDecl;
33 class VarDecl;
34 class NamedDecl;
35 class TranslationUnitDecl;
36 
37 namespace cross_tu {
38 
39 enum class index_error_code {
40   unspecified = 1,
41   missing_index_file,
42   invalid_index_format,
43   multiple_definitions,
44   missing_definition,
45   failed_import,
46   failed_to_get_external_ast,
47   failed_to_generate_usr,
48   triple_mismatch,
49   lang_mismatch,
50   lang_dialect_mismatch,
51   load_threshold_reached,
52   invocation_list_ambiguous,
53   invocation_list_file_not_found,
54   invocation_list_empty,
55   invocation_list_wrong_format,
56   invocation_list_lookup_unsuccessful
57 };
58 
59 class IndexError : public llvm::ErrorInfo<IndexError> {
60 public:
61   static char ID;
IndexError(index_error_code C)62   IndexError(index_error_code C) : Code(C), LineNo(0) {}
63   IndexError(index_error_code C, std::string FileName, int LineNo = 0)
Code(C)64       : Code(C), FileName(std::move(FileName)), LineNo(LineNo) {}
IndexError(index_error_code C,std::string FileName,std::string TripleToName,std::string TripleFromName)65   IndexError(index_error_code C, std::string FileName, std::string TripleToName,
66              std::string TripleFromName)
67       : Code(C), FileName(std::move(FileName)),
68         TripleToName(std::move(TripleToName)),
69         TripleFromName(std::move(TripleFromName)) {}
70   void log(raw_ostream &OS) const override;
71   std::error_code convertToErrorCode() const override;
getCode()72   index_error_code getCode() const { return Code; }
getLineNum()73   int getLineNum() const { return LineNo; }
getFileName()74   std::string getFileName() const { return FileName; }
getTripleToName()75   std::string getTripleToName() const { return TripleToName; }
getTripleFromName()76   std::string getTripleFromName() const { return TripleFromName; }
77 
78 private:
79   index_error_code Code;
80   std::string FileName;
81   int LineNo;
82   std::string TripleToName;
83   std::string TripleFromName;
84 };
85 
86 /// This function parses an index file that determines which
87 /// translation unit contains which definition. The IndexPath is not prefixed
88 /// with CTUDir, so an absolute path is expected for consistent results.
89 ///
90 /// The index file format is the following:
91 /// each line consists of an USR and a filepath separated by a space.
92 ///
93 /// \return Returns a map where the USR is the key and the filepath is the value
94 ///         or an error.
95 llvm::Expected<llvm::StringMap<std::string>>
96 parseCrossTUIndex(StringRef IndexPath);
97 
98 std::string createCrossTUIndexString(const llvm::StringMap<std::string> &Index);
99 
100 using InvocationListTy = llvm::StringMap<llvm::SmallVector<std::string, 32>>;
101 /// Parse the YAML formatted invocation list file content \p FileContent.
102 /// The format is expected to be a mapping from from absolute source file
103 /// paths in the filesystem to a list of command-line parts, which
104 /// constitute the invocation needed to compile that file. That invocation
105 /// will be used to produce the AST of the TU.
106 llvm::Expected<InvocationListTy> parseInvocationList(
107     StringRef FileContent,
108     llvm::sys::path::Style PathStyle = llvm::sys::path::Style::posix);
109 
110 // Returns true if the variable or any field of a record variable is const.
111 bool containsConst(const VarDecl *VD, const ASTContext &ACtx);
112 
113 /// This class is used for tools that requires cross translation
114 ///        unit capability.
115 ///
116 /// This class can load definitions from external AST sources.
117 /// The loaded definition will be merged back to the original AST using the
118 /// AST Importer.
119 /// In order to use this class, an index file is required that describes
120 /// the locations of the AST files for each definition.
121 ///
122 /// Note that this class also implements caching.
123 class CrossTranslationUnitContext {
124 public:
125   CrossTranslationUnitContext(CompilerInstance &CI);
126   ~CrossTranslationUnitContext();
127 
128   /// This function loads a function or variable definition from an
129   ///        external AST file and merges it into the original AST.
130   ///
131   /// This method should only be used on functions that have no definitions or
132   /// variables that have no initializer in
133   /// the current translation unit. A function definition with the same
134   /// declaration will be looked up in the index file which should be in the
135   /// \p CrossTUDir directory, called \p IndexName. In case the declaration is
136   /// found in the index the corresponding AST will be loaded and the
137   /// definition will be merged into the original AST using the AST Importer.
138   ///
139   /// \return The declaration with the definition will be returned.
140   /// If no suitable definition is found in the index file or multiple
141   /// definitions found error will be returned.
142   ///
143   /// Note that the AST files should also be in the \p CrossTUDir.
144   llvm::Expected<const FunctionDecl *>
145   getCrossTUDefinition(const FunctionDecl *FD, StringRef CrossTUDir,
146                        StringRef IndexName, bool DisplayCTUProgress = false);
147   llvm::Expected<const VarDecl *>
148   getCrossTUDefinition(const VarDecl *VD, StringRef CrossTUDir,
149                        StringRef IndexName, bool DisplayCTUProgress = false);
150 
151   /// This function loads a definition from an external AST file.
152   ///
153   /// A definition with the same declaration will be looked up in the
154   /// index file which should be in the \p CrossTUDir directory, called
155   /// \p IndexName. In case the declaration is found in the index the
156   /// corresponding AST will be loaded. If the number of TUs imported
157   /// reaches \p CTULoadTreshold, no loading is performed.
158   ///
159   /// \return Returns a pointer to the ASTUnit that contains the definition of
160   /// the looked up name or an Error.
161   /// The returned pointer is never a nullptr.
162   ///
163   /// Note that the AST files should also be in the \p CrossTUDir.
164   llvm::Expected<ASTUnit *> loadExternalAST(StringRef LookupName,
165                                             StringRef CrossTUDir,
166                                             StringRef IndexName,
167                                             bool DisplayCTUProgress = false);
168 
169   /// This function merges a definition from a separate AST Unit into
170   ///        the current one which was created by the compiler instance that
171   ///        was passed to the constructor.
172   ///
173   /// \return Returns the resulting definition or an error.
174   llvm::Expected<const FunctionDecl *> importDefinition(const FunctionDecl *FD,
175                                                         ASTUnit *Unit);
176   llvm::Expected<const VarDecl *> importDefinition(const VarDecl *VD,
177                                                    ASTUnit *Unit);
178 
179   /// Get a name to identify a named decl.
180   static llvm::Optional<std::string> getLookupName(const NamedDecl *ND);
181 
182   /// Emit diagnostics for the user for potential configuration errors.
183   void emitCrossTUDiagnostics(const IndexError &IE);
184 
185   /// Determine the original source location in the original TU for an
186   /// imported source location.
187   /// \p ToLoc Source location in the imported-to AST.
188   /// \return Source location in the imported-from AST and the corresponding
189   /// ASTUnit object (the AST was loaded from a file using an internal ASTUnit
190   /// object that is returned here).
191   /// If any error happens (ToLoc is a non-imported source location) empty is
192   /// returned.
193   llvm::Optional<std::pair<SourceLocation /*FromLoc*/, ASTUnit *>>
194   getImportedFromSourceLocation(const clang::SourceLocation &ToLoc) const;
195 
196 private:
197   using ImportedFileIDMap =
198       llvm::DenseMap<FileID, std::pair<FileID, ASTUnit *>>;
199 
200   void lazyInitImporterSharedSt(TranslationUnitDecl *ToTU);
201   ASTImporter &getOrCreateASTImporter(ASTUnit *Unit);
202   template <typename T>
203   llvm::Expected<const T *> getCrossTUDefinitionImpl(const T *D,
204                                                      StringRef CrossTUDir,
205                                                      StringRef IndexName,
206                                                      bool DisplayCTUProgress);
207   template <typename T>
208   const T *findDefInDeclContext(const DeclContext *DC,
209                                 StringRef LookupName);
210   template <typename T>
211   llvm::Expected<const T *> importDefinitionImpl(const T *D, ASTUnit *Unit);
212 
213   using ImporterMapTy =
214       llvm::DenseMap<TranslationUnitDecl *, std::unique_ptr<ASTImporter>>;
215 
216   ImporterMapTy ASTUnitImporterMap;
217 
218   ASTContext &Context;
219   std::shared_ptr<ASTImporterSharedState> ImporterSharedSt;
220   /// Map of imported FileID's (in "To" context) to FileID in "From" context
221   /// and the ASTUnit for the From context.
222   /// This map is used by getImportedFromSourceLocation to lookup a FileID and
223   /// its Preprocessor when knowing only the FileID in the 'To' context. The
224   /// FileID could be imported by any of multiple 'From' ASTImporter objects.
225   /// we do not want to loop over all ASTImporter's to find the one that
226   /// imported the FileID.
227   ImportedFileIDMap ImportedFileIDs;
228 
229   using LoadResultTy = llvm::Expected<std::unique_ptr<ASTUnit>>;
230 
231   /// Loads ASTUnits from AST-dumps or source-files.
232   class ASTLoader {
233   public:
234     ASTLoader(CompilerInstance &CI, StringRef CTUDir,
235               StringRef InvocationListFilePath);
236 
237     /// Load the ASTUnit by its identifier found in the index file. If the
238     /// indentifier is suffixed with '.ast' it is considered a dump. Otherwise
239     /// it is treated as source-file, and on-demand parsed. Relative paths are
240     /// prefixed with CTUDir.
241     LoadResultTy load(StringRef Identifier);
242 
243     /// Lazily initialize the invocation list information, which is needed for
244     /// on-demand parsing.
245     llvm::Error lazyInitInvocationList();
246 
247   private:
248     /// The style used for storage and lookup of filesystem paths.
249     /// Defaults to posix.
250     const llvm::sys::path::Style PathStyle = llvm::sys::path::Style::posix;
251 
252     /// Loads an AST from a pch-dump.
253     LoadResultTy loadFromDump(StringRef Identifier);
254     /// Loads an AST from a source-file.
255     LoadResultTy loadFromSource(StringRef Identifier);
256 
257     CompilerInstance &CI;
258     StringRef CTUDir;
259     /// The path to the file containing the invocation list, which is in YAML
260     /// format, and contains a mapping from source files to compiler invocations
261     /// that produce the AST used for analysis.
262     StringRef InvocationListFilePath;
263     /// In case of on-demand parsing, the invocations for parsing the source
264     /// files is stored.
265     llvm::Optional<InvocationListTy> InvocationList;
266   };
267 
268   /// Maintain number of AST loads and check for reaching the load limit.
269   class ASTLoadGuard {
270   public:
ASTLoadGuard(unsigned Limit)271     ASTLoadGuard(unsigned Limit) : Limit(Limit) {}
272 
273     /// Indicates, whether a new load operation is permitted, it is within the
274     /// threshold.
275     operator bool() const { return Count < Limit; }
276 
277     /// Tell that a new AST was loaded successfully.
indicateLoadSuccess()278     void indicateLoadSuccess() { ++Count; }
279 
280   private:
281     /// The number of ASTs actually imported.
282     unsigned Count{0u};
283     /// The limit (threshold) value for number of loaded ASTs.
284     const unsigned Limit;
285   };
286 
287   /// Storage and load of ASTUnits, cached access, and providing searchability
288   /// are the concerns of ASTUnitStorage class.
289   class ASTUnitStorage {
290   public:
291     ASTUnitStorage(CompilerInstance &CI);
292     /// Loads an ASTUnit for a function.
293     ///
294     /// \param FunctionName USR name of the function.
295     /// \param CrossTUDir Path to the directory used to store CTU related files.
296     /// \param IndexName Name of the file inside \p CrossTUDir which maps
297     /// function USR names to file paths. These files contain the corresponding
298     /// AST-dumps.
299     /// \param DisplayCTUProgress Display a message about loading new ASTs.
300     ///
301     /// \return An Expected instance which contains the ASTUnit pointer or the
302     /// error occured during the load.
303     llvm::Expected<ASTUnit *> getASTUnitForFunction(StringRef FunctionName,
304                                                     StringRef CrossTUDir,
305                                                     StringRef IndexName,
306                                                     bool DisplayCTUProgress);
307     /// Identifies the path of the file which can be used to load the ASTUnit
308     /// for a given function.
309     ///
310     /// \param FunctionName USR name of the function.
311     /// \param CrossTUDir Path to the directory used to store CTU related files.
312     /// \param IndexName Name of the file inside \p CrossTUDir which maps
313     /// function USR names to file paths. These files contain the corresponding
314     /// AST-dumps.
315     ///
316     /// \return An Expected instance containing the filepath.
317     llvm::Expected<std::string> getFileForFunction(StringRef FunctionName,
318                                                    StringRef CrossTUDir,
319                                                    StringRef IndexName);
320 
321   private:
322     llvm::Error ensureCTUIndexLoaded(StringRef CrossTUDir, StringRef IndexName);
323     llvm::Expected<ASTUnit *> getASTUnitForFile(StringRef FileName,
324                                                 bool DisplayCTUProgress);
325 
326     template <typename... T> using BaseMapTy = llvm::StringMap<T...>;
327     using OwningMapTy = BaseMapTy<std::unique_ptr<clang::ASTUnit>>;
328     using NonOwningMapTy = BaseMapTy<clang::ASTUnit *>;
329 
330     OwningMapTy FileASTUnitMap;
331     NonOwningMapTy NameASTUnitMap;
332 
333     using IndexMapTy = BaseMapTy<std::string>;
334     IndexMapTy NameFileMap;
335 
336     /// Loads the AST based on the identifier found in the index.
337     ASTLoader Loader;
338 
339     /// Limit the number of loaded ASTs. It is used to limit the  memory usage
340     /// of the CrossTranslationUnitContext. The ASTUnitStorage has the
341     /// information whether the AST to load is actually loaded or returned from
342     /// cache. This information is needed to maintain the counter.
343     ASTLoadGuard LoadGuard;
344   };
345 
346   ASTUnitStorage ASTStorage;
347 };
348 
349 } // namespace cross_tu
350 } // namespace clang
351 
352 #endif // LLVM_CLANG_CROSSTU_CROSSTRANSLATIONUNIT_H
353