1 //===--- FileManager.h - File System Probing and Caching --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Defines the clang::FileManager interface and associated types.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_BASIC_FILEMANAGER_H
15 #define LLVM_CLANG_BASIC_FILEMANAGER_H
16 
17 #include "clang/Basic/FileSystemOptions.h"
18 #include "clang/Basic/LLVM.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/IntrusiveRefCntPtr.h"
21 #include "llvm/ADT/PointerUnion.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/Allocator.h"
26 #include "llvm/Support/ErrorOr.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/VirtualFileSystem.h"
29 #include <ctime>
30 #include <map>
31 #include <memory>
32 #include <string>
33 
34 namespace llvm {
35 
36 class MemoryBuffer;
37 
38 } // end namespace llvm
39 
40 namespace clang {
41 
42 class FileSystemStatCache;
43 
44 /// Cached information about one directory (either on disk or in
45 /// the virtual file system).
46 class DirectoryEntry {
47   friend class FileManager;
48 
49   // FIXME: We should not be storing a directory entry name here.
50   StringRef Name; // Name of the directory.
51 
52 public:
53   StringRef getName() const { return Name; }
54 };
55 
56 /// A reference to a \c DirectoryEntry  that includes the name of the directory
57 /// as it was accessed by the FileManager's client.
58 class DirectoryEntryRef {
59 public:
60   const DirectoryEntry &getDirEntry() const { return *Entry->getValue(); }
61 
62   StringRef getName() const { return Entry->getKey(); }
63 
64 private:
65   friend class FileManager;
66 
67   DirectoryEntryRef(
68       llvm::StringMapEntry<llvm::ErrorOr<DirectoryEntry &>> *Entry)
69       : Entry(Entry) {}
70 
71   const llvm::StringMapEntry<llvm::ErrorOr<DirectoryEntry &>> *Entry;
72 };
73 
74 /// Cached information about one file (either on disk
75 /// or in the virtual file system).
76 ///
77 /// If the 'File' member is valid, then this FileEntry has an open file
78 /// descriptor for the file.
79 class FileEntry {
80   friend class FileManager;
81 
82   StringRef Name;             // Name of the file.
83   std::string RealPathName;   // Real path to the file; could be empty.
84   off_t Size;                 // File size in bytes.
85   time_t ModTime;             // Modification time of file.
86   const DirectoryEntry *Dir;  // Directory file lives in.
87   llvm::sys::fs::UniqueID UniqueID;
88   unsigned UID;               // A unique (small) ID for the file.
89   bool IsNamedPipe;
90   bool IsValid;               // Is this \c FileEntry initialized and valid?
91 
92   /// The open file, if it is owned by the \p FileEntry.
93   mutable std::unique_ptr<llvm::vfs::File> File;
94 
95 public:
96   FileEntry()
97       : UniqueID(0, 0), IsNamedPipe(false), IsValid(false)
98   {}
99 
100   FileEntry(const FileEntry &) = delete;
101   FileEntry &operator=(const FileEntry &) = delete;
102 
103   StringRef getName() const { return Name; }
104   StringRef tryGetRealPathName() const { return RealPathName; }
105   bool isValid() const { return IsValid; }
106   off_t getSize() const { return Size; }
107   unsigned getUID() const { return UID; }
108   const llvm::sys::fs::UniqueID &getUniqueID() const { return UniqueID; }
109   time_t getModificationTime() const { return ModTime; }
110 
111   /// Return the directory the file lives in.
112   const DirectoryEntry *getDir() const { return Dir; }
113 
114   bool operator<(const FileEntry &RHS) const { return UniqueID < RHS.UniqueID; }
115 
116   /// Check whether the file is a named pipe (and thus can't be opened by
117   /// the native FileManager methods).
118   bool isNamedPipe() const { return IsNamedPipe; }
119 
120   void closeFile() const {
121     File.reset(); // rely on destructor to close File
122   }
123 
124   // Only for use in tests to see if deferred opens are happening, rather than
125   // relying on RealPathName being empty.
126   bool isOpenForTests() const { return File != nullptr; }
127 };
128 
129 /// A reference to a \c FileEntry that includes the name of the file as it was
130 /// accessed by the FileManager's client.
131 class FileEntryRef {
132 public:
133   FileEntryRef() = delete;
134   FileEntryRef(StringRef Name, const FileEntry &Entry)
135       : Name(Name), Entry(&Entry) {}
136 
137   const StringRef getName() const { return Name; }
138 
139   bool isValid() const { return Entry->isValid(); }
140 
141   const FileEntry &getFileEntry() const { return *Entry; }
142 
143   off_t getSize() const { return Entry->getSize(); }
144 
145   unsigned getUID() const { return Entry->getUID(); }
146 
147   const llvm::sys::fs::UniqueID &getUniqueID() const {
148     return Entry->getUniqueID();
149   }
150 
151   time_t getModificationTime() const { return Entry->getModificationTime(); }
152 
153   friend bool operator==(const FileEntryRef &LHS, const FileEntryRef &RHS) {
154     return LHS.Entry == RHS.Entry && LHS.Name == RHS.Name;
155   }
156   friend bool operator!=(const FileEntryRef &LHS, const FileEntryRef &RHS) {
157     return !(LHS == RHS);
158   }
159 
160 private:
161   StringRef Name;
162   const FileEntry *Entry;
163 };
164 
165 /// Implements support for file system lookup, file system caching,
166 /// and directory search management.
167 ///
168 /// This also handles more advanced properties, such as uniquing files based
169 /// on "inode", so that a file with two names (e.g. symlinked) will be treated
170 /// as a single file.
171 ///
172 class FileManager : public RefCountedBase<FileManager> {
173   IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
174   FileSystemOptions FileSystemOpts;
175 
176   /// Cache for existing real directories.
177   std::map<llvm::sys::fs::UniqueID, DirectoryEntry> UniqueRealDirs;
178 
179   /// Cache for existing real files.
180   std::map<llvm::sys::fs::UniqueID, FileEntry> UniqueRealFiles;
181 
182   /// The virtual directories that we have allocated.
183   ///
184   /// For each virtual file (e.g. foo/bar/baz.cpp), we add all of its parent
185   /// directories (foo/ and foo/bar/) here.
186   SmallVector<std::unique_ptr<DirectoryEntry>, 4> VirtualDirectoryEntries;
187   /// The virtual files that we have allocated.
188   SmallVector<std::unique_ptr<FileEntry>, 4> VirtualFileEntries;
189 
190   /// A set of files that bypass the maps and uniquing.  They can have
191   /// conflicting filenames.
192   SmallVector<std::unique_ptr<FileEntry>, 0> BypassFileEntries;
193 
194   /// A cache that maps paths to directory entries (either real or
195   /// virtual) we have looked up, or an error that occurred when we looked up
196   /// the directory.
197   ///
198   /// The actual Entries for real directories/files are
199   /// owned by UniqueRealDirs/UniqueRealFiles above, while the Entries
200   /// for virtual directories/files are owned by
201   /// VirtualDirectoryEntries/VirtualFileEntries above.
202   ///
203   llvm::StringMap<llvm::ErrorOr<DirectoryEntry &>, llvm::BumpPtrAllocator>
204   SeenDirEntries;
205 
206   /// A reference to the file entry that is associated with a particular
207   /// filename, or a reference to another filename that should be looked up
208   /// instead of the accessed filename.
209   ///
210   /// The reference to another filename is specifically useful for Redirecting
211   /// VFSs that use external names. In that case, the \c FileEntryRef returned
212   /// by the \c FileManager will have the external name, and not the name that
213   /// was used to lookup the file.
214   using SeenFileEntryOrRedirect =
215       llvm::PointerUnion<FileEntry *, const StringRef *>;
216 
217   /// A cache that maps paths to file entries (either real or
218   /// virtual) we have looked up, or an error that occurred when we looked up
219   /// the file.
220   ///
221   /// \see SeenDirEntries
222   llvm::StringMap<llvm::ErrorOr<SeenFileEntryOrRedirect>,
223                   llvm::BumpPtrAllocator>
224       SeenFileEntries;
225 
226   /// The canonical names of files and directories .
227   llvm::DenseMap<const void *, llvm::StringRef> CanonicalNames;
228 
229   /// Storage for canonical names that we have computed.
230   llvm::BumpPtrAllocator CanonicalNameStorage;
231 
232   /// Each FileEntry we create is assigned a unique ID #.
233   ///
234   unsigned NextFileUID;
235 
236   // Caching.
237   std::unique_ptr<FileSystemStatCache> StatCache;
238 
239   std::error_code getStatValue(StringRef Path, llvm::vfs::Status &Status,
240                                bool isFile,
241                                std::unique_ptr<llvm::vfs::File> *F);
242 
243   /// Add all ancestors of the given path (pointing to either a file
244   /// or a directory) as virtual directories.
245   void addAncestorsAsVirtualDirs(StringRef Path);
246 
247   /// Fills the RealPathName in file entry.
248   void fillRealPathName(FileEntry *UFE, llvm::StringRef FileName);
249 
250 public:
251   /// Construct a file manager, optionally with a custom VFS.
252   ///
253   /// \param FS if non-null, the VFS to use.  Otherwise uses
254   /// llvm::vfs::getRealFileSystem().
255   FileManager(const FileSystemOptions &FileSystemOpts,
256               IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS = nullptr);
257   ~FileManager();
258 
259   /// Installs the provided FileSystemStatCache object within
260   /// the FileManager.
261   ///
262   /// Ownership of this object is transferred to the FileManager.
263   ///
264   /// \param statCache the new stat cache to install. Ownership of this
265   /// object is transferred to the FileManager.
266   void setStatCache(std::unique_ptr<FileSystemStatCache> statCache);
267 
268   /// Removes the FileSystemStatCache object from the manager.
269   void clearStatCache();
270 
271   /// Returns the number of unique real file entries cached by the file manager.
272   size_t getNumUniqueRealFiles() const { return UniqueRealFiles.size(); }
273 
274   /// Lookup, cache, and verify the specified directory (real or
275   /// virtual).
276   ///
277   /// This returns a \c std::error_code if there was an error reading the
278   /// directory. On success, returns the reference to the directory entry
279   /// together with the exact path that was used to access a file by a
280   /// particular call to getDirectoryRef.
281   ///
282   /// \param CacheFailure If true and the file does not exist, we'll cache
283   /// the failure to find this file.
284   llvm::Expected<DirectoryEntryRef> getDirectoryRef(StringRef DirName,
285                                                     bool CacheFailure = true);
286 
287   /// Get a \c DirectoryEntryRef if it exists, without doing anything on error.
288   llvm::Optional<DirectoryEntryRef>
289   getOptionalDirectoryRef(StringRef DirName, bool CacheFailure = true) {
290     return llvm::expectedToOptional(getDirectoryRef(DirName, CacheFailure));
291   }
292 
293   /// Lookup, cache, and verify the specified directory (real or
294   /// virtual).
295   ///
296   /// This function is deprecated and will be removed at some point in the
297   /// future, new clients should use
298   ///  \c getDirectoryRef.
299   ///
300   /// This returns a \c std::error_code if there was an error reading the
301   /// directory. If there is no error, the DirectoryEntry is guaranteed to be
302   /// non-NULL.
303   ///
304   /// \param CacheFailure If true and the file does not exist, we'll cache
305   /// the failure to find this file.
306   llvm::ErrorOr<const DirectoryEntry *>
307   getDirectory(StringRef DirName, bool CacheFailure = true);
308 
309   /// Lookup, cache, and verify the specified file (real or
310   /// virtual).
311   ///
312   /// This function is deprecated and will be removed at some point in the
313   /// future, new clients should use
314   ///  \c getFileRef.
315   ///
316   /// This returns a \c std::error_code if there was an error loading the file.
317   /// If there is no error, the FileEntry is guaranteed to be non-NULL.
318   ///
319   /// \param OpenFile if true and the file exists, it will be opened.
320   ///
321   /// \param CacheFailure If true and the file does not exist, we'll cache
322   /// the failure to find this file.
323   llvm::ErrorOr<const FileEntry *>
324   getFile(StringRef Filename, bool OpenFile = false, bool CacheFailure = true);
325 
326   /// Lookup, cache, and verify the specified file (real or virtual). Return the
327   /// reference to the file entry together with the exact path that was used to
328   /// access a file by a particular call to getFileRef. If the underlying VFS is
329   /// a redirecting VFS that uses external file names, the returned FileEntryRef
330   /// will use the external name instead of the filename that was passed to this
331   /// method.
332   ///
333   /// This returns a \c std::error_code if there was an error loading the file,
334   /// or a \c FileEntryRef otherwise.
335   ///
336   /// \param OpenFile if true and the file exists, it will be opened.
337   ///
338   /// \param CacheFailure If true and the file does not exist, we'll cache
339   /// the failure to find this file.
340   llvm::Expected<FileEntryRef> getFileRef(StringRef Filename,
341                                           bool OpenFile = false,
342                                           bool CacheFailure = true);
343 
344   /// Get a FileEntryRef if it exists, without doing anything on error.
345   llvm::Optional<FileEntryRef> getOptionalFileRef(StringRef Filename,
346                                                   bool OpenFile = false,
347                                                   bool CacheFailure = true) {
348     return llvm::expectedToOptional(
349         getFileRef(Filename, OpenFile, CacheFailure));
350   }
351 
352   /// Returns the current file system options
353   FileSystemOptions &getFileSystemOpts() { return FileSystemOpts; }
354   const FileSystemOptions &getFileSystemOpts() const { return FileSystemOpts; }
355 
356   llvm::vfs::FileSystem &getVirtualFileSystem() const { return *FS; }
357 
358   void setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) {
359     this->FS = std::move(FS);
360   }
361 
362   /// Retrieve a file entry for a "virtual" file that acts as
363   /// if there were a file with the given name on disk.
364   ///
365   /// The file itself is not accessed.
366   const FileEntry *getVirtualFile(StringRef Filename, off_t Size,
367                                   time_t ModificationTime);
368 
369   /// Retrieve a FileEntry that bypasses VFE, which is expected to be a virtual
370   /// file entry, to access the real file.  The returned FileEntry will have
371   /// the same filename as FE but a different identity and its own stat.
372   ///
373   /// This should be used only for rare error recovery paths because it
374   /// bypasses all mapping and uniquing, blindly creating a new FileEntry.
375   /// There is no attempt to deduplicate these; if you bypass the same file
376   /// twice, you get two new file entries.
377   llvm::Optional<FileEntryRef> getBypassFile(FileEntryRef VFE);
378 
379   /// Open the specified file as a MemoryBuffer, returning a new
380   /// MemoryBuffer if successful, otherwise returning null.
381   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
382   getBufferForFile(const FileEntry *Entry, bool isVolatile = false,
383                    bool RequiresNullTerminator = true);
384   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
385   getBufferForFile(StringRef Filename, bool isVolatile = false,
386                    bool RequiresNullTerminator = true) {
387     return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile,
388                                 RequiresNullTerminator);
389   }
390 
391 private:
392   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
393   getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile,
394                        bool RequiresNullTerminator);
395 
396 public:
397   /// Get the 'stat' information for the given \p Path.
398   ///
399   /// If the path is relative, it will be resolved against the WorkingDir of the
400   /// FileManager's FileSystemOptions.
401   ///
402   /// \returns a \c std::error_code describing an error, if there was one
403   std::error_code getNoncachedStatValue(StringRef Path,
404                                         llvm::vfs::Status &Result);
405 
406   /// If path is not absolute and FileSystemOptions set the working
407   /// directory, the path is modified to be relative to the given
408   /// working directory.
409   /// \returns true if \c path changed.
410   bool FixupRelativePath(SmallVectorImpl<char> &path) const;
411 
412   /// Makes \c Path absolute taking into account FileSystemOptions and the
413   /// working directory option.
414   /// \returns true if \c Path changed to absolute.
415   bool makeAbsolutePath(SmallVectorImpl<char> &Path) const;
416 
417   /// Produce an array mapping from the unique IDs assigned to each
418   /// file to the corresponding FileEntry pointer.
419   void GetUniqueIDMapping(
420                     SmallVectorImpl<const FileEntry *> &UIDToFiles) const;
421 
422   /// Retrieve the canonical name for a given directory.
423   ///
424   /// This is a very expensive operation, despite its results being cached,
425   /// and should only be used when the physical layout of the file system is
426   /// required, which is (almost) never.
427   StringRef getCanonicalName(const DirectoryEntry *Dir);
428 
429   /// Retrieve the canonical name for a given file.
430   ///
431   /// This is a very expensive operation, despite its results being cached,
432   /// and should only be used when the physical layout of the file system is
433   /// required, which is (almost) never.
434   StringRef getCanonicalName(const FileEntry *File);
435 
436   void PrintStats() const;
437 };
438 
439 } // end namespace clang
440 
441 #endif // LLVM_CLANG_BASIC_FILEMANAGER_H
442