1 //===--- FileManager.h - File System Probing and Caching --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Defines the clang::FileManager interface and associated types.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_BASIC_FILEMANAGER_H
15 #define LLVM_CLANG_BASIC_FILEMANAGER_H
16 
17 #include "clang/Basic/FileSystemOptions.h"
18 #include "clang/Basic/LLVM.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/IntrusiveRefCntPtr.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Allocator.h"
25 #include "llvm/Support/ErrorOr.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/VirtualFileSystem.h"
28 #include <ctime>
29 #include <map>
30 #include <memory>
31 #include <string>
32 
33 namespace llvm {
34 
35 class MemoryBuffer;
36 
37 } // end namespace llvm
38 
39 namespace clang {
40 
41 class FileSystemStatCache;
42 
43 /// Cached information about one directory (either on disk or in
44 /// the virtual file system).
45 class DirectoryEntry {
46   friend class FileManager;
47 
48   // FIXME: We should not be storing a directory entry name here.
49   StringRef Name; // Name of the directory.
50 
51 public:
52   StringRef getName() const { return Name; }
53 };
54 
55 /// A reference to a \c DirectoryEntry  that includes the name of the directory
56 /// as it was accessed by the FileManager's client.
57 class DirectoryEntryRef {
58 public:
59   const DirectoryEntry &getDirEntry() const { return *Entry->getValue(); }
60 
61   StringRef getName() const { return Entry->getKey(); }
62 
63 private:
64   friend class FileManager;
65 
66   DirectoryEntryRef(
67       llvm::StringMapEntry<llvm::ErrorOr<DirectoryEntry &>> *Entry)
68       : Entry(Entry) {}
69 
70   const llvm::StringMapEntry<llvm::ErrorOr<DirectoryEntry &>> *Entry;
71 };
72 
73 /// Cached information about one file (either on disk
74 /// or in the virtual file system).
75 ///
76 /// If the 'File' member is valid, then this FileEntry has an open file
77 /// descriptor for the file.
78 class FileEntry {
79   friend class FileManager;
80 
81   StringRef Name;             // Name of the file.
82   std::string RealPathName;   // Real path to the file; could be empty.
83   off_t Size;                 // File size in bytes.
84   time_t ModTime;             // Modification time of file.
85   const DirectoryEntry *Dir;  // Directory file lives in.
86   llvm::sys::fs::UniqueID UniqueID;
87   unsigned UID;               // A unique (small) ID for the file.
88   bool IsNamedPipe;
89   bool IsValid;               // Is this \c FileEntry initialized and valid?
90 
91   /// The open file, if it is owned by the \p FileEntry.
92   mutable std::unique_ptr<llvm::vfs::File> File;
93 
94 public:
95   FileEntry()
96       : UniqueID(0, 0), IsNamedPipe(false), IsValid(false)
97   {}
98 
99   FileEntry(const FileEntry &) = delete;
100   FileEntry &operator=(const FileEntry &) = delete;
101 
102   StringRef getName() const { return Name; }
103   StringRef tryGetRealPathName() const { return RealPathName; }
104   bool isValid() const { return IsValid; }
105   off_t getSize() const { return Size; }
106   unsigned getUID() const { return UID; }
107   const llvm::sys::fs::UniqueID &getUniqueID() const { return UniqueID; }
108   time_t getModificationTime() const { return ModTime; }
109 
110   /// Return the directory the file lives in.
111   const DirectoryEntry *getDir() const { return Dir; }
112 
113   bool operator<(const FileEntry &RHS) const { return UniqueID < RHS.UniqueID; }
114 
115   /// Check whether the file is a named pipe (and thus can't be opened by
116   /// the native FileManager methods).
117   bool isNamedPipe() const { return IsNamedPipe; }
118 
119   void closeFile() const {
120     File.reset(); // rely on destructor to close File
121   }
122 
123   // Only for use in tests to see if deferred opens are happening, rather than
124   // relying on RealPathName being empty.
125   bool isOpenForTests() const { return File != nullptr; }
126 };
127 
128 /// A reference to a \c FileEntry that includes the name of the file as it was
129 /// accessed by the FileManager's client.
130 class FileEntryRef {
131 public:
132   FileEntryRef() = delete;
133   FileEntryRef(StringRef Name, const FileEntry &Entry)
134       : Name(Name), Entry(&Entry) {}
135 
136   const StringRef getName() const { return Name; }
137 
138   bool isValid() const { return Entry->isValid(); }
139 
140   const FileEntry &getFileEntry() const { return *Entry; }
141 
142   off_t getSize() const { return Entry->getSize(); }
143 
144   unsigned getUID() const { return Entry->getUID(); }
145 
146   const llvm::sys::fs::UniqueID &getUniqueID() const {
147     return Entry->getUniqueID();
148   }
149 
150   time_t getModificationTime() const { return Entry->getModificationTime(); }
151 
152   friend bool operator==(const FileEntryRef &LHS, const FileEntryRef &RHS) {
153     return LHS.Entry == RHS.Entry && LHS.Name == RHS.Name;
154   }
155   friend bool operator!=(const FileEntryRef &LHS, const FileEntryRef &RHS) {
156     return !(LHS == RHS);
157   }
158 
159 private:
160   StringRef Name;
161   const FileEntry *Entry;
162 };
163 
164 /// Implements support for file system lookup, file system caching,
165 /// and directory search management.
166 ///
167 /// This also handles more advanced properties, such as uniquing files based
168 /// on "inode", so that a file with two names (e.g. symlinked) will be treated
169 /// as a single file.
170 ///
171 class FileManager : public RefCountedBase<FileManager> {
172   IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
173   FileSystemOptions FileSystemOpts;
174 
175   /// Cache for existing real directories.
176   std::map<llvm::sys::fs::UniqueID, DirectoryEntry> UniqueRealDirs;
177 
178   /// Cache for existing real files.
179   std::map<llvm::sys::fs::UniqueID, FileEntry> UniqueRealFiles;
180 
181   /// The virtual directories that we have allocated.
182   ///
183   /// For each virtual file (e.g. foo/bar/baz.cpp), we add all of its parent
184   /// directories (foo/ and foo/bar/) here.
185   SmallVector<std::unique_ptr<DirectoryEntry>, 4> VirtualDirectoryEntries;
186   /// The virtual files that we have allocated.
187   SmallVector<std::unique_ptr<FileEntry>, 4> VirtualFileEntries;
188 
189   /// A set of files that bypass the maps and uniquing.  They can have
190   /// conflicting filenames.
191   SmallVector<std::unique_ptr<FileEntry>, 0> BypassFileEntries;
192 
193   /// A cache that maps paths to directory entries (either real or
194   /// virtual) we have looked up, or an error that occurred when we looked up
195   /// the directory.
196   ///
197   /// The actual Entries for real directories/files are
198   /// owned by UniqueRealDirs/UniqueRealFiles above, while the Entries
199   /// for virtual directories/files are owned by
200   /// VirtualDirectoryEntries/VirtualFileEntries above.
201   ///
202   llvm::StringMap<llvm::ErrorOr<DirectoryEntry &>, llvm::BumpPtrAllocator>
203   SeenDirEntries;
204 
205   /// A reference to the file entry that is associated with a particular
206   /// filename, or a reference to another filename that should be looked up
207   /// instead of the accessed filename.
208   ///
209   /// The reference to another filename is specifically useful for Redirecting
210   /// VFSs that use external names. In that case, the \c FileEntryRef returned
211   /// by the \c FileManager will have the external name, and not the name that
212   /// was used to lookup the file.
213   using SeenFileEntryOrRedirect =
214       llvm::PointerUnion<FileEntry *, const StringRef *>;
215 
216   /// A cache that maps paths to file entries (either real or
217   /// virtual) we have looked up, or an error that occurred when we looked up
218   /// the file.
219   ///
220   /// \see SeenDirEntries
221   llvm::StringMap<llvm::ErrorOr<SeenFileEntryOrRedirect>,
222                   llvm::BumpPtrAllocator>
223       SeenFileEntries;
224 
225   /// The canonical names of files and directories .
226   llvm::DenseMap<const void *, llvm::StringRef> CanonicalNames;
227 
228   /// Storage for canonical names that we have computed.
229   llvm::BumpPtrAllocator CanonicalNameStorage;
230 
231   /// Each FileEntry we create is assigned a unique ID #.
232   ///
233   unsigned NextFileUID;
234 
235   // Caching.
236   std::unique_ptr<FileSystemStatCache> StatCache;
237 
238   std::error_code getStatValue(StringRef Path, llvm::vfs::Status &Status,
239                                bool isFile,
240                                std::unique_ptr<llvm::vfs::File> *F);
241 
242   /// Add all ancestors of the given path (pointing to either a file
243   /// or a directory) as virtual directories.
244   void addAncestorsAsVirtualDirs(StringRef Path);
245 
246   /// Fills the RealPathName in file entry.
247   void fillRealPathName(FileEntry *UFE, llvm::StringRef FileName);
248 
249 public:
250   /// Construct a file manager, optionally with a custom VFS.
251   ///
252   /// \param FS if non-null, the VFS to use.  Otherwise uses
253   /// llvm::vfs::getRealFileSystem().
254   FileManager(const FileSystemOptions &FileSystemOpts,
255               IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS = nullptr);
256   ~FileManager();
257 
258   /// Installs the provided FileSystemStatCache object within
259   /// the FileManager.
260   ///
261   /// Ownership of this object is transferred to the FileManager.
262   ///
263   /// \param statCache the new stat cache to install. Ownership of this
264   /// object is transferred to the FileManager.
265   void setStatCache(std::unique_ptr<FileSystemStatCache> statCache);
266 
267   /// Removes the FileSystemStatCache object from the manager.
268   void clearStatCache();
269 
270   /// Returns the number of unique real file entries cached by the file manager.
271   size_t getNumUniqueRealFiles() const { return UniqueRealFiles.size(); }
272 
273   /// Lookup, cache, and verify the specified directory (real or
274   /// virtual).
275   ///
276   /// This returns a \c std::error_code if there was an error reading the
277   /// directory. On success, returns the reference to the directory entry
278   /// together with the exact path that was used to access a file by a
279   /// particular call to getDirectoryRef.
280   ///
281   /// \param CacheFailure If true and the file does not exist, we'll cache
282   /// the failure to find this file.
283   llvm::Expected<DirectoryEntryRef> getDirectoryRef(StringRef DirName,
284                                                     bool CacheFailure = true);
285 
286   /// Get a \c DirectoryEntryRef if it exists, without doing anything on error.
287   llvm::Optional<DirectoryEntryRef>
288   getOptionalDirectoryRef(StringRef DirName, bool CacheFailure = true) {
289     return llvm::expectedToOptional(getDirectoryRef(DirName, CacheFailure));
290   }
291 
292   /// Lookup, cache, and verify the specified directory (real or
293   /// virtual).
294   ///
295   /// This function is deprecated and will be removed at some point in the
296   /// future, new clients should use
297   ///  \c getDirectoryRef.
298   ///
299   /// This returns a \c std::error_code if there was an error reading the
300   /// directory. If there is no error, the DirectoryEntry is guaranteed to be
301   /// non-NULL.
302   ///
303   /// \param CacheFailure If true and the file does not exist, we'll cache
304   /// the failure to find this file.
305   llvm::ErrorOr<const DirectoryEntry *>
306   getDirectory(StringRef DirName, bool CacheFailure = true);
307 
308   /// Lookup, cache, and verify the specified file (real or
309   /// virtual).
310   ///
311   /// This function is deprecated and will be removed at some point in the
312   /// future, new clients should use
313   ///  \c getFileRef.
314   ///
315   /// This returns a \c std::error_code if there was an error loading the file.
316   /// If there is no error, the FileEntry is guaranteed to be non-NULL.
317   ///
318   /// \param OpenFile if true and the file exists, it will be opened.
319   ///
320   /// \param CacheFailure If true and the file does not exist, we'll cache
321   /// the failure to find this file.
322   llvm::ErrorOr<const FileEntry *>
323   getFile(StringRef Filename, bool OpenFile = false, bool CacheFailure = true);
324 
325   /// Lookup, cache, and verify the specified file (real or virtual). Return the
326   /// reference to the file entry together with the exact path that was used to
327   /// access a file by a particular call to getFileRef. If the underlying VFS is
328   /// a redirecting VFS that uses external file names, the returned FileEntryRef
329   /// will use the external name instead of the filename that was passed to this
330   /// method.
331   ///
332   /// This returns a \c std::error_code if there was an error loading the file,
333   /// or a \c FileEntryRef otherwise.
334   ///
335   /// \param OpenFile if true and the file exists, it will be opened.
336   ///
337   /// \param CacheFailure If true and the file does not exist, we'll cache
338   /// the failure to find this file.
339   llvm::Expected<FileEntryRef> getFileRef(StringRef Filename,
340                                           bool OpenFile = false,
341                                           bool CacheFailure = true);
342 
343   /// Get a FileEntryRef if it exists, without doing anything on error.
344   llvm::Optional<FileEntryRef> getOptionalFileRef(StringRef Filename,
345                                                   bool OpenFile = false,
346                                                   bool CacheFailure = true) {
347     return llvm::expectedToOptional(
348         getFileRef(Filename, OpenFile, CacheFailure));
349   }
350 
351   /// Returns the current file system options
352   FileSystemOptions &getFileSystemOpts() { return FileSystemOpts; }
353   const FileSystemOptions &getFileSystemOpts() const { return FileSystemOpts; }
354 
355   llvm::vfs::FileSystem &getVirtualFileSystem() const { return *FS; }
356 
357   void setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) {
358     this->FS = std::move(FS);
359   }
360 
361   /// Retrieve a file entry for a "virtual" file that acts as
362   /// if there were a file with the given name on disk.
363   ///
364   /// The file itself is not accessed.
365   const FileEntry *getVirtualFile(StringRef Filename, off_t Size,
366                                   time_t ModificationTime);
367 
368   /// Retrieve a FileEntry that bypasses VFE, which is expected to be a virtual
369   /// file entry, to access the real file.  The returned FileEntry will have
370   /// the same filename as FE but a different identity and its own stat.
371   ///
372   /// This should be used only for rare error recovery paths because it
373   /// bypasses all mapping and uniquing, blindly creating a new FileEntry.
374   /// There is no attempt to deduplicate these; if you bypass the same file
375   /// twice, you get two new file entries.
376   llvm::Optional<FileEntryRef> getBypassFile(FileEntryRef VFE);
377 
378   /// Open the specified file as a MemoryBuffer, returning a new
379   /// MemoryBuffer if successful, otherwise returning null.
380   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
381   getBufferForFile(const FileEntry *Entry, bool isVolatile = false);
382   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
383   getBufferForFile(StringRef Filename, bool isVolatile = false) {
384     return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile);
385   }
386 
387 private:
388   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
389   getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile);
390 
391 public:
392   /// Get the 'stat' information for the given \p Path.
393   ///
394   /// If the path is relative, it will be resolved against the WorkingDir of the
395   /// FileManager's FileSystemOptions.
396   ///
397   /// \returns a \c std::error_code describing an error, if there was one
398   std::error_code getNoncachedStatValue(StringRef Path,
399                                         llvm::vfs::Status &Result);
400 
401   /// If path is not absolute and FileSystemOptions set the working
402   /// directory, the path is modified to be relative to the given
403   /// working directory.
404   /// \returns true if \c path changed.
405   bool FixupRelativePath(SmallVectorImpl<char> &path) const;
406 
407   /// Makes \c Path absolute taking into account FileSystemOptions and the
408   /// working directory option.
409   /// \returns true if \c Path changed to absolute.
410   bool makeAbsolutePath(SmallVectorImpl<char> &Path) const;
411 
412   /// Produce an array mapping from the unique IDs assigned to each
413   /// file to the corresponding FileEntry pointer.
414   void GetUniqueIDMapping(
415                     SmallVectorImpl<const FileEntry *> &UIDToFiles) const;
416 
417   /// Retrieve the canonical name for a given directory.
418   ///
419   /// This is a very expensive operation, despite its results being cached,
420   /// and should only be used when the physical layout of the file system is
421   /// required, which is (almost) never.
422   StringRef getCanonicalName(const DirectoryEntry *Dir);
423 
424   /// Retrieve the canonical name for a given file.
425   ///
426   /// This is a very expensive operation, despite its results being cached,
427   /// and should only be used when the physical layout of the file system is
428   /// required, which is (almost) never.
429   StringRef getCanonicalName(const FileEntry *File);
430 
431   void PrintStats() const;
432 };
433 
434 } // end namespace clang
435 
436 #endif // LLVM_CLANG_BASIC_FILEMANAGER_H
437