106f32e7eSjoerg //===--- FileManager.h - File System Probing and Caching --------*- C++ -*-===//
206f32e7eSjoerg //
306f32e7eSjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406f32e7eSjoerg // See https://llvm.org/LICENSE.txt for license information.
506f32e7eSjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606f32e7eSjoerg //
706f32e7eSjoerg //===----------------------------------------------------------------------===//
806f32e7eSjoerg ///
906f32e7eSjoerg /// \file
1006f32e7eSjoerg /// Defines the clang::FileManager interface and associated types.
1106f32e7eSjoerg ///
1206f32e7eSjoerg //===----------------------------------------------------------------------===//
1306f32e7eSjoerg 
1406f32e7eSjoerg #ifndef LLVM_CLANG_BASIC_FILEMANAGER_H
1506f32e7eSjoerg #define LLVM_CLANG_BASIC_FILEMANAGER_H
1606f32e7eSjoerg 
17*13fbcb42Sjoerg #include "clang/Basic/DirectoryEntry.h"
18*13fbcb42Sjoerg #include "clang/Basic/FileEntry.h"
1906f32e7eSjoerg #include "clang/Basic/FileSystemOptions.h"
2006f32e7eSjoerg #include "clang/Basic/LLVM.h"
2106f32e7eSjoerg #include "llvm/ADT/DenseMap.h"
2206f32e7eSjoerg #include "llvm/ADT/IntrusiveRefCntPtr.h"
23*13fbcb42Sjoerg #include "llvm/ADT/PointerUnion.h"
2406f32e7eSjoerg #include "llvm/ADT/SmallVector.h"
2506f32e7eSjoerg #include "llvm/ADT/StringMap.h"
2606f32e7eSjoerg #include "llvm/ADT/StringRef.h"
2706f32e7eSjoerg #include "llvm/Support/Allocator.h"
2806f32e7eSjoerg #include "llvm/Support/ErrorOr.h"
2906f32e7eSjoerg #include "llvm/Support/FileSystem.h"
3006f32e7eSjoerg #include "llvm/Support/VirtualFileSystem.h"
3106f32e7eSjoerg #include <ctime>
3206f32e7eSjoerg #include <map>
3306f32e7eSjoerg #include <memory>
3406f32e7eSjoerg #include <string>
3506f32e7eSjoerg 
3606f32e7eSjoerg namespace llvm {
3706f32e7eSjoerg 
3806f32e7eSjoerg class MemoryBuffer;
3906f32e7eSjoerg 
4006f32e7eSjoerg } // end namespace llvm
4106f32e7eSjoerg 
4206f32e7eSjoerg namespace clang {
4306f32e7eSjoerg 
4406f32e7eSjoerg class FileSystemStatCache;
4506f32e7eSjoerg 
4606f32e7eSjoerg /// Implements support for file system lookup, file system caching,
4706f32e7eSjoerg /// and directory search management.
4806f32e7eSjoerg ///
4906f32e7eSjoerg /// This also handles more advanced properties, such as uniquing files based
5006f32e7eSjoerg /// on "inode", so that a file with two names (e.g. symlinked) will be treated
5106f32e7eSjoerg /// as a single file.
5206f32e7eSjoerg ///
5306f32e7eSjoerg class FileManager : public RefCountedBase<FileManager> {
5406f32e7eSjoerg   IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
5506f32e7eSjoerg   FileSystemOptions FileSystemOpts;
5606f32e7eSjoerg 
5706f32e7eSjoerg   /// Cache for existing real directories.
5806f32e7eSjoerg   std::map<llvm::sys::fs::UniqueID, DirectoryEntry> UniqueRealDirs;
5906f32e7eSjoerg 
6006f32e7eSjoerg   /// Cache for existing real files.
6106f32e7eSjoerg   std::map<llvm::sys::fs::UniqueID, FileEntry> UniqueRealFiles;
6206f32e7eSjoerg 
6306f32e7eSjoerg   /// The virtual directories that we have allocated.
6406f32e7eSjoerg   ///
6506f32e7eSjoerg   /// For each virtual file (e.g. foo/bar/baz.cpp), we add all of its parent
6606f32e7eSjoerg   /// directories (foo/ and foo/bar/) here.
6706f32e7eSjoerg   SmallVector<std::unique_ptr<DirectoryEntry>, 4> VirtualDirectoryEntries;
6806f32e7eSjoerg   /// The virtual files that we have allocated.
6906f32e7eSjoerg   SmallVector<std::unique_ptr<FileEntry>, 4> VirtualFileEntries;
7006f32e7eSjoerg 
7106f32e7eSjoerg   /// A set of files that bypass the maps and uniquing.  They can have
7206f32e7eSjoerg   /// conflicting filenames.
7306f32e7eSjoerg   SmallVector<std::unique_ptr<FileEntry>, 0> BypassFileEntries;
7406f32e7eSjoerg 
7506f32e7eSjoerg   /// A cache that maps paths to directory entries (either real or
7606f32e7eSjoerg   /// virtual) we have looked up, or an error that occurred when we looked up
7706f32e7eSjoerg   /// the directory.
7806f32e7eSjoerg   ///
7906f32e7eSjoerg   /// The actual Entries for real directories/files are
8006f32e7eSjoerg   /// owned by UniqueRealDirs/UniqueRealFiles above, while the Entries
8106f32e7eSjoerg   /// for virtual directories/files are owned by
8206f32e7eSjoerg   /// VirtualDirectoryEntries/VirtualFileEntries above.
8306f32e7eSjoerg   ///
8406f32e7eSjoerg   llvm::StringMap<llvm::ErrorOr<DirectoryEntry &>, llvm::BumpPtrAllocator>
8506f32e7eSjoerg   SeenDirEntries;
8606f32e7eSjoerg 
8706f32e7eSjoerg   /// A cache that maps paths to file entries (either real or
8806f32e7eSjoerg   /// virtual) we have looked up, or an error that occurred when we looked up
8906f32e7eSjoerg   /// the file.
9006f32e7eSjoerg   ///
9106f32e7eSjoerg   /// \see SeenDirEntries
92*13fbcb42Sjoerg   llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>, llvm::BumpPtrAllocator>
9306f32e7eSjoerg       SeenFileEntries;
9406f32e7eSjoerg 
95*13fbcb42Sjoerg   /// A mirror of SeenFileEntries to give fake answers for getBypassFile().
96*13fbcb42Sjoerg   ///
97*13fbcb42Sjoerg   /// Don't bother hooking up a BumpPtrAllocator. This should be rarely used,
98*13fbcb42Sjoerg   /// and only on error paths.
99*13fbcb42Sjoerg   std::unique_ptr<llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>>>
100*13fbcb42Sjoerg       SeenBypassFileEntries;
101*13fbcb42Sjoerg 
102*13fbcb42Sjoerg   /// The file entry for stdin, if it has been accessed through the FileManager.
103*13fbcb42Sjoerg   Optional<FileEntryRef> STDIN;
104*13fbcb42Sjoerg 
105*13fbcb42Sjoerg   /// The canonical names of files and directories .
106*13fbcb42Sjoerg   llvm::DenseMap<const void *, llvm::StringRef> CanonicalNames;
10706f32e7eSjoerg 
10806f32e7eSjoerg   /// Storage for canonical names that we have computed.
10906f32e7eSjoerg   llvm::BumpPtrAllocator CanonicalNameStorage;
11006f32e7eSjoerg 
11106f32e7eSjoerg   /// Each FileEntry we create is assigned a unique ID #.
11206f32e7eSjoerg   ///
11306f32e7eSjoerg   unsigned NextFileUID;
11406f32e7eSjoerg 
11506f32e7eSjoerg   // Caching.
11606f32e7eSjoerg   std::unique_ptr<FileSystemStatCache> StatCache;
11706f32e7eSjoerg 
11806f32e7eSjoerg   std::error_code getStatValue(StringRef Path, llvm::vfs::Status &Status,
11906f32e7eSjoerg                                bool isFile,
12006f32e7eSjoerg                                std::unique_ptr<llvm::vfs::File> *F);
12106f32e7eSjoerg 
12206f32e7eSjoerg   /// Add all ancestors of the given path (pointing to either a file
12306f32e7eSjoerg   /// or a directory) as virtual directories.
12406f32e7eSjoerg   void addAncestorsAsVirtualDirs(StringRef Path);
12506f32e7eSjoerg 
12606f32e7eSjoerg   /// Fills the RealPathName in file entry.
12706f32e7eSjoerg   void fillRealPathName(FileEntry *UFE, llvm::StringRef FileName);
12806f32e7eSjoerg 
12906f32e7eSjoerg public:
13006f32e7eSjoerg   /// Construct a file manager, optionally with a custom VFS.
13106f32e7eSjoerg   ///
13206f32e7eSjoerg   /// \param FS if non-null, the VFS to use.  Otherwise uses
13306f32e7eSjoerg   /// llvm::vfs::getRealFileSystem().
13406f32e7eSjoerg   FileManager(const FileSystemOptions &FileSystemOpts,
13506f32e7eSjoerg               IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS = nullptr);
13606f32e7eSjoerg   ~FileManager();
13706f32e7eSjoerg 
13806f32e7eSjoerg   /// Installs the provided FileSystemStatCache object within
13906f32e7eSjoerg   /// the FileManager.
14006f32e7eSjoerg   ///
14106f32e7eSjoerg   /// Ownership of this object is transferred to the FileManager.
14206f32e7eSjoerg   ///
14306f32e7eSjoerg   /// \param statCache the new stat cache to install. Ownership of this
14406f32e7eSjoerg   /// object is transferred to the FileManager.
14506f32e7eSjoerg   void setStatCache(std::unique_ptr<FileSystemStatCache> statCache);
14606f32e7eSjoerg 
14706f32e7eSjoerg   /// Removes the FileSystemStatCache object from the manager.
14806f32e7eSjoerg   void clearStatCache();
14906f32e7eSjoerg 
15006f32e7eSjoerg   /// Returns the number of unique real file entries cached by the file manager.
getNumUniqueRealFiles()15106f32e7eSjoerg   size_t getNumUniqueRealFiles() const { return UniqueRealFiles.size(); }
15206f32e7eSjoerg 
15306f32e7eSjoerg   /// Lookup, cache, and verify the specified directory (real or
15406f32e7eSjoerg   /// virtual).
15506f32e7eSjoerg   ///
15606f32e7eSjoerg   /// This returns a \c std::error_code if there was an error reading the
15706f32e7eSjoerg   /// directory. On success, returns the reference to the directory entry
15806f32e7eSjoerg   /// together with the exact path that was used to access a file by a
15906f32e7eSjoerg   /// particular call to getDirectoryRef.
16006f32e7eSjoerg   ///
16106f32e7eSjoerg   /// \param CacheFailure If true and the file does not exist, we'll cache
16206f32e7eSjoerg   /// the failure to find this file.
16306f32e7eSjoerg   llvm::Expected<DirectoryEntryRef> getDirectoryRef(StringRef DirName,
16406f32e7eSjoerg                                                     bool CacheFailure = true);
16506f32e7eSjoerg 
16606f32e7eSjoerg   /// Get a \c DirectoryEntryRef if it exists, without doing anything on error.
16706f32e7eSjoerg   llvm::Optional<DirectoryEntryRef>
16806f32e7eSjoerg   getOptionalDirectoryRef(StringRef DirName, bool CacheFailure = true) {
16906f32e7eSjoerg     return llvm::expectedToOptional(getDirectoryRef(DirName, CacheFailure));
17006f32e7eSjoerg   }
17106f32e7eSjoerg 
17206f32e7eSjoerg   /// Lookup, cache, and verify the specified directory (real or
17306f32e7eSjoerg   /// virtual).
17406f32e7eSjoerg   ///
17506f32e7eSjoerg   /// This function is deprecated and will be removed at some point in the
17606f32e7eSjoerg   /// future, new clients should use
17706f32e7eSjoerg   ///  \c getDirectoryRef.
17806f32e7eSjoerg   ///
17906f32e7eSjoerg   /// This returns a \c std::error_code if there was an error reading the
18006f32e7eSjoerg   /// directory. If there is no error, the DirectoryEntry is guaranteed to be
18106f32e7eSjoerg   /// non-NULL.
18206f32e7eSjoerg   ///
18306f32e7eSjoerg   /// \param CacheFailure If true and the file does not exist, we'll cache
18406f32e7eSjoerg   /// the failure to find this file.
18506f32e7eSjoerg   llvm::ErrorOr<const DirectoryEntry *>
18606f32e7eSjoerg   getDirectory(StringRef DirName, bool CacheFailure = true);
18706f32e7eSjoerg 
18806f32e7eSjoerg   /// Lookup, cache, and verify the specified file (real or
18906f32e7eSjoerg   /// virtual).
19006f32e7eSjoerg   ///
19106f32e7eSjoerg   /// This function is deprecated and will be removed at some point in the
19206f32e7eSjoerg   /// future, new clients should use
19306f32e7eSjoerg   ///  \c getFileRef.
19406f32e7eSjoerg   ///
19506f32e7eSjoerg   /// This returns a \c std::error_code if there was an error loading the file.
19606f32e7eSjoerg   /// If there is no error, the FileEntry is guaranteed to be non-NULL.
19706f32e7eSjoerg   ///
19806f32e7eSjoerg   /// \param OpenFile if true and the file exists, it will be opened.
19906f32e7eSjoerg   ///
20006f32e7eSjoerg   /// \param CacheFailure If true and the file does not exist, we'll cache
20106f32e7eSjoerg   /// the failure to find this file.
20206f32e7eSjoerg   llvm::ErrorOr<const FileEntry *>
20306f32e7eSjoerg   getFile(StringRef Filename, bool OpenFile = false, bool CacheFailure = true);
20406f32e7eSjoerg 
20506f32e7eSjoerg   /// Lookup, cache, and verify the specified file (real or virtual). Return the
20606f32e7eSjoerg   /// reference to the file entry together with the exact path that was used to
20706f32e7eSjoerg   /// access a file by a particular call to getFileRef. If the underlying VFS is
20806f32e7eSjoerg   /// a redirecting VFS that uses external file names, the returned FileEntryRef
20906f32e7eSjoerg   /// will use the external name instead of the filename that was passed to this
21006f32e7eSjoerg   /// method.
21106f32e7eSjoerg   ///
21206f32e7eSjoerg   /// This returns a \c std::error_code if there was an error loading the file,
21306f32e7eSjoerg   /// or a \c FileEntryRef otherwise.
21406f32e7eSjoerg   ///
21506f32e7eSjoerg   /// \param OpenFile if true and the file exists, it will be opened.
21606f32e7eSjoerg   ///
21706f32e7eSjoerg   /// \param CacheFailure If true and the file does not exist, we'll cache
21806f32e7eSjoerg   /// the failure to find this file.
21906f32e7eSjoerg   llvm::Expected<FileEntryRef> getFileRef(StringRef Filename,
22006f32e7eSjoerg                                           bool OpenFile = false,
22106f32e7eSjoerg                                           bool CacheFailure = true);
22206f32e7eSjoerg 
223*13fbcb42Sjoerg   /// Get the FileEntryRef for stdin, returning an error if stdin cannot be
224*13fbcb42Sjoerg   /// read.
225*13fbcb42Sjoerg   ///
226*13fbcb42Sjoerg   /// This reads and caches stdin before returning. Subsequent calls return the
227*13fbcb42Sjoerg   /// same file entry, and a reference to the cached input is returned by calls
228*13fbcb42Sjoerg   /// to getBufferForFile.
229*13fbcb42Sjoerg   llvm::Expected<FileEntryRef> getSTDIN();
230*13fbcb42Sjoerg 
23106f32e7eSjoerg   /// Get a FileEntryRef if it exists, without doing anything on error.
23206f32e7eSjoerg   llvm::Optional<FileEntryRef> getOptionalFileRef(StringRef Filename,
23306f32e7eSjoerg                                                   bool OpenFile = false,
23406f32e7eSjoerg                                                   bool CacheFailure = true) {
23506f32e7eSjoerg     return llvm::expectedToOptional(
23606f32e7eSjoerg         getFileRef(Filename, OpenFile, CacheFailure));
23706f32e7eSjoerg   }
23806f32e7eSjoerg 
23906f32e7eSjoerg   /// Returns the current file system options
getFileSystemOpts()24006f32e7eSjoerg   FileSystemOptions &getFileSystemOpts() { return FileSystemOpts; }
getFileSystemOpts()24106f32e7eSjoerg   const FileSystemOptions &getFileSystemOpts() const { return FileSystemOpts; }
24206f32e7eSjoerg 
getVirtualFileSystem()24306f32e7eSjoerg   llvm::vfs::FileSystem &getVirtualFileSystem() const { return *FS; }
24406f32e7eSjoerg 
setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)24506f32e7eSjoerg   void setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) {
24606f32e7eSjoerg     this->FS = std::move(FS);
24706f32e7eSjoerg   }
24806f32e7eSjoerg 
24906f32e7eSjoerg   /// Retrieve a file entry for a "virtual" file that acts as
25006f32e7eSjoerg   /// if there were a file with the given name on disk.
25106f32e7eSjoerg   ///
25206f32e7eSjoerg   /// The file itself is not accessed.
253*13fbcb42Sjoerg   FileEntryRef getVirtualFileRef(StringRef Filename, off_t Size,
254*13fbcb42Sjoerg                                  time_t ModificationTime);
255*13fbcb42Sjoerg 
25606f32e7eSjoerg   const FileEntry *getVirtualFile(StringRef Filename, off_t Size,
25706f32e7eSjoerg                                   time_t ModificationTime);
25806f32e7eSjoerg 
25906f32e7eSjoerg   /// Retrieve a FileEntry that bypasses VFE, which is expected to be a virtual
26006f32e7eSjoerg   /// file entry, to access the real file.  The returned FileEntry will have
26106f32e7eSjoerg   /// the same filename as FE but a different identity and its own stat.
26206f32e7eSjoerg   ///
26306f32e7eSjoerg   /// This should be used only for rare error recovery paths because it
26406f32e7eSjoerg   /// bypasses all mapping and uniquing, blindly creating a new FileEntry.
26506f32e7eSjoerg   /// There is no attempt to deduplicate these; if you bypass the same file
26606f32e7eSjoerg   /// twice, you get two new file entries.
26706f32e7eSjoerg   llvm::Optional<FileEntryRef> getBypassFile(FileEntryRef VFE);
26806f32e7eSjoerg 
26906f32e7eSjoerg   /// Open the specified file as a MemoryBuffer, returning a new
27006f32e7eSjoerg   /// MemoryBuffer if successful, otherwise returning null.
27106f32e7eSjoerg   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
272*13fbcb42Sjoerg   getBufferForFile(const FileEntry *Entry, bool isVolatile = false,
273*13fbcb42Sjoerg                    bool RequiresNullTerminator = true);
27406f32e7eSjoerg   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
275*13fbcb42Sjoerg   getBufferForFile(StringRef Filename, bool isVolatile = false,
276*13fbcb42Sjoerg                    bool RequiresNullTerminator = true) {
277*13fbcb42Sjoerg     return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile,
278*13fbcb42Sjoerg                                 RequiresNullTerminator);
27906f32e7eSjoerg   }
28006f32e7eSjoerg 
28106f32e7eSjoerg private:
28206f32e7eSjoerg   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
283*13fbcb42Sjoerg   getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile,
284*13fbcb42Sjoerg                        bool RequiresNullTerminator);
28506f32e7eSjoerg 
28606f32e7eSjoerg public:
28706f32e7eSjoerg   /// Get the 'stat' information for the given \p Path.
28806f32e7eSjoerg   ///
28906f32e7eSjoerg   /// If the path is relative, it will be resolved against the WorkingDir of the
29006f32e7eSjoerg   /// FileManager's FileSystemOptions.
29106f32e7eSjoerg   ///
29206f32e7eSjoerg   /// \returns a \c std::error_code describing an error, if there was one
29306f32e7eSjoerg   std::error_code getNoncachedStatValue(StringRef Path,
29406f32e7eSjoerg                                         llvm::vfs::Status &Result);
29506f32e7eSjoerg 
29606f32e7eSjoerg   /// If path is not absolute and FileSystemOptions set the working
29706f32e7eSjoerg   /// directory, the path is modified to be relative to the given
29806f32e7eSjoerg   /// working directory.
29906f32e7eSjoerg   /// \returns true if \c path changed.
30006f32e7eSjoerg   bool FixupRelativePath(SmallVectorImpl<char> &path) const;
30106f32e7eSjoerg 
30206f32e7eSjoerg   /// Makes \c Path absolute taking into account FileSystemOptions and the
30306f32e7eSjoerg   /// working directory option.
30406f32e7eSjoerg   /// \returns true if \c Path changed to absolute.
30506f32e7eSjoerg   bool makeAbsolutePath(SmallVectorImpl<char> &Path) const;
30606f32e7eSjoerg 
30706f32e7eSjoerg   /// Produce an array mapping from the unique IDs assigned to each
30806f32e7eSjoerg   /// file to the corresponding FileEntry pointer.
30906f32e7eSjoerg   void GetUniqueIDMapping(
31006f32e7eSjoerg                     SmallVectorImpl<const FileEntry *> &UIDToFiles) const;
31106f32e7eSjoerg 
31206f32e7eSjoerg   /// Retrieve the canonical name for a given directory.
31306f32e7eSjoerg   ///
31406f32e7eSjoerg   /// This is a very expensive operation, despite its results being cached,
31506f32e7eSjoerg   /// and should only be used when the physical layout of the file system is
31606f32e7eSjoerg   /// required, which is (almost) never.
31706f32e7eSjoerg   StringRef getCanonicalName(const DirectoryEntry *Dir);
31806f32e7eSjoerg 
319*13fbcb42Sjoerg   /// Retrieve the canonical name for a given file.
320*13fbcb42Sjoerg   ///
321*13fbcb42Sjoerg   /// This is a very expensive operation, despite its results being cached,
322*13fbcb42Sjoerg   /// and should only be used when the physical layout of the file system is
323*13fbcb42Sjoerg   /// required, which is (almost) never.
324*13fbcb42Sjoerg   StringRef getCanonicalName(const FileEntry *File);
325*13fbcb42Sjoerg 
32606f32e7eSjoerg   void PrintStats() const;
32706f32e7eSjoerg };
32806f32e7eSjoerg 
32906f32e7eSjoerg } // end namespace clang
33006f32e7eSjoerg 
33106f32e7eSjoerg #endif // LLVM_CLANG_BASIC_FILEMANAGER_H
332