106f32e7eSjoerg //===--- FileManager.h - File System Probing and Caching --------*- C++ -*-===// 206f32e7eSjoerg // 306f32e7eSjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406f32e7eSjoerg // See https://llvm.org/LICENSE.txt for license information. 506f32e7eSjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606f32e7eSjoerg // 706f32e7eSjoerg //===----------------------------------------------------------------------===// 806f32e7eSjoerg /// 906f32e7eSjoerg /// \file 1006f32e7eSjoerg /// Defines the clang::FileManager interface and associated types. 1106f32e7eSjoerg /// 1206f32e7eSjoerg //===----------------------------------------------------------------------===// 1306f32e7eSjoerg 1406f32e7eSjoerg #ifndef LLVM_CLANG_BASIC_FILEMANAGER_H 1506f32e7eSjoerg #define LLVM_CLANG_BASIC_FILEMANAGER_H 1606f32e7eSjoerg 17*13fbcb42Sjoerg #include "clang/Basic/DirectoryEntry.h" 18*13fbcb42Sjoerg #include "clang/Basic/FileEntry.h" 1906f32e7eSjoerg #include "clang/Basic/FileSystemOptions.h" 2006f32e7eSjoerg #include "clang/Basic/LLVM.h" 2106f32e7eSjoerg #include "llvm/ADT/DenseMap.h" 2206f32e7eSjoerg #include "llvm/ADT/IntrusiveRefCntPtr.h" 23*13fbcb42Sjoerg #include "llvm/ADT/PointerUnion.h" 2406f32e7eSjoerg #include "llvm/ADT/SmallVector.h" 2506f32e7eSjoerg #include "llvm/ADT/StringMap.h" 2606f32e7eSjoerg #include "llvm/ADT/StringRef.h" 2706f32e7eSjoerg #include "llvm/Support/Allocator.h" 2806f32e7eSjoerg #include "llvm/Support/ErrorOr.h" 2906f32e7eSjoerg #include "llvm/Support/FileSystem.h" 3006f32e7eSjoerg #include "llvm/Support/VirtualFileSystem.h" 3106f32e7eSjoerg #include <ctime> 3206f32e7eSjoerg #include <map> 3306f32e7eSjoerg #include <memory> 3406f32e7eSjoerg #include <string> 3506f32e7eSjoerg 3606f32e7eSjoerg namespace llvm { 3706f32e7eSjoerg 3806f32e7eSjoerg class MemoryBuffer; 3906f32e7eSjoerg 4006f32e7eSjoerg } // end namespace llvm 4106f32e7eSjoerg 4206f32e7eSjoerg namespace clang { 4306f32e7eSjoerg 4406f32e7eSjoerg class FileSystemStatCache; 4506f32e7eSjoerg 4606f32e7eSjoerg /// Implements support for file system lookup, file system caching, 4706f32e7eSjoerg /// and directory search management. 4806f32e7eSjoerg /// 4906f32e7eSjoerg /// This also handles more advanced properties, such as uniquing files based 5006f32e7eSjoerg /// on "inode", so that a file with two names (e.g. symlinked) will be treated 5106f32e7eSjoerg /// as a single file. 5206f32e7eSjoerg /// 5306f32e7eSjoerg class FileManager : public RefCountedBase<FileManager> { 5406f32e7eSjoerg IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS; 5506f32e7eSjoerg FileSystemOptions FileSystemOpts; 5606f32e7eSjoerg 5706f32e7eSjoerg /// Cache for existing real directories. 5806f32e7eSjoerg std::map<llvm::sys::fs::UniqueID, DirectoryEntry> UniqueRealDirs; 5906f32e7eSjoerg 6006f32e7eSjoerg /// Cache for existing real files. 6106f32e7eSjoerg std::map<llvm::sys::fs::UniqueID, FileEntry> UniqueRealFiles; 6206f32e7eSjoerg 6306f32e7eSjoerg /// The virtual directories that we have allocated. 6406f32e7eSjoerg /// 6506f32e7eSjoerg /// For each virtual file (e.g. foo/bar/baz.cpp), we add all of its parent 6606f32e7eSjoerg /// directories (foo/ and foo/bar/) here. 6706f32e7eSjoerg SmallVector<std::unique_ptr<DirectoryEntry>, 4> VirtualDirectoryEntries; 6806f32e7eSjoerg /// The virtual files that we have allocated. 6906f32e7eSjoerg SmallVector<std::unique_ptr<FileEntry>, 4> VirtualFileEntries; 7006f32e7eSjoerg 7106f32e7eSjoerg /// A set of files that bypass the maps and uniquing. They can have 7206f32e7eSjoerg /// conflicting filenames. 7306f32e7eSjoerg SmallVector<std::unique_ptr<FileEntry>, 0> BypassFileEntries; 7406f32e7eSjoerg 7506f32e7eSjoerg /// A cache that maps paths to directory entries (either real or 7606f32e7eSjoerg /// virtual) we have looked up, or an error that occurred when we looked up 7706f32e7eSjoerg /// the directory. 7806f32e7eSjoerg /// 7906f32e7eSjoerg /// The actual Entries for real directories/files are 8006f32e7eSjoerg /// owned by UniqueRealDirs/UniqueRealFiles above, while the Entries 8106f32e7eSjoerg /// for virtual directories/files are owned by 8206f32e7eSjoerg /// VirtualDirectoryEntries/VirtualFileEntries above. 8306f32e7eSjoerg /// 8406f32e7eSjoerg llvm::StringMap<llvm::ErrorOr<DirectoryEntry &>, llvm::BumpPtrAllocator> 8506f32e7eSjoerg SeenDirEntries; 8606f32e7eSjoerg 8706f32e7eSjoerg /// A cache that maps paths to file entries (either real or 8806f32e7eSjoerg /// virtual) we have looked up, or an error that occurred when we looked up 8906f32e7eSjoerg /// the file. 9006f32e7eSjoerg /// 9106f32e7eSjoerg /// \see SeenDirEntries 92*13fbcb42Sjoerg llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>, llvm::BumpPtrAllocator> 9306f32e7eSjoerg SeenFileEntries; 9406f32e7eSjoerg 95*13fbcb42Sjoerg /// A mirror of SeenFileEntries to give fake answers for getBypassFile(). 96*13fbcb42Sjoerg /// 97*13fbcb42Sjoerg /// Don't bother hooking up a BumpPtrAllocator. This should be rarely used, 98*13fbcb42Sjoerg /// and only on error paths. 99*13fbcb42Sjoerg std::unique_ptr<llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>>> 100*13fbcb42Sjoerg SeenBypassFileEntries; 101*13fbcb42Sjoerg 102*13fbcb42Sjoerg /// The file entry for stdin, if it has been accessed through the FileManager. 103*13fbcb42Sjoerg Optional<FileEntryRef> STDIN; 104*13fbcb42Sjoerg 105*13fbcb42Sjoerg /// The canonical names of files and directories . 106*13fbcb42Sjoerg llvm::DenseMap<const void *, llvm::StringRef> CanonicalNames; 10706f32e7eSjoerg 10806f32e7eSjoerg /// Storage for canonical names that we have computed. 10906f32e7eSjoerg llvm::BumpPtrAllocator CanonicalNameStorage; 11006f32e7eSjoerg 11106f32e7eSjoerg /// Each FileEntry we create is assigned a unique ID #. 11206f32e7eSjoerg /// 11306f32e7eSjoerg unsigned NextFileUID; 11406f32e7eSjoerg 11506f32e7eSjoerg // Caching. 11606f32e7eSjoerg std::unique_ptr<FileSystemStatCache> StatCache; 11706f32e7eSjoerg 11806f32e7eSjoerg std::error_code getStatValue(StringRef Path, llvm::vfs::Status &Status, 11906f32e7eSjoerg bool isFile, 12006f32e7eSjoerg std::unique_ptr<llvm::vfs::File> *F); 12106f32e7eSjoerg 12206f32e7eSjoerg /// Add all ancestors of the given path (pointing to either a file 12306f32e7eSjoerg /// or a directory) as virtual directories. 12406f32e7eSjoerg void addAncestorsAsVirtualDirs(StringRef Path); 12506f32e7eSjoerg 12606f32e7eSjoerg /// Fills the RealPathName in file entry. 12706f32e7eSjoerg void fillRealPathName(FileEntry *UFE, llvm::StringRef FileName); 12806f32e7eSjoerg 12906f32e7eSjoerg public: 13006f32e7eSjoerg /// Construct a file manager, optionally with a custom VFS. 13106f32e7eSjoerg /// 13206f32e7eSjoerg /// \param FS if non-null, the VFS to use. Otherwise uses 13306f32e7eSjoerg /// llvm::vfs::getRealFileSystem(). 13406f32e7eSjoerg FileManager(const FileSystemOptions &FileSystemOpts, 13506f32e7eSjoerg IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS = nullptr); 13606f32e7eSjoerg ~FileManager(); 13706f32e7eSjoerg 13806f32e7eSjoerg /// Installs the provided FileSystemStatCache object within 13906f32e7eSjoerg /// the FileManager. 14006f32e7eSjoerg /// 14106f32e7eSjoerg /// Ownership of this object is transferred to the FileManager. 14206f32e7eSjoerg /// 14306f32e7eSjoerg /// \param statCache the new stat cache to install. Ownership of this 14406f32e7eSjoerg /// object is transferred to the FileManager. 14506f32e7eSjoerg void setStatCache(std::unique_ptr<FileSystemStatCache> statCache); 14606f32e7eSjoerg 14706f32e7eSjoerg /// Removes the FileSystemStatCache object from the manager. 14806f32e7eSjoerg void clearStatCache(); 14906f32e7eSjoerg 15006f32e7eSjoerg /// Returns the number of unique real file entries cached by the file manager. getNumUniqueRealFiles()15106f32e7eSjoerg size_t getNumUniqueRealFiles() const { return UniqueRealFiles.size(); } 15206f32e7eSjoerg 15306f32e7eSjoerg /// Lookup, cache, and verify the specified directory (real or 15406f32e7eSjoerg /// virtual). 15506f32e7eSjoerg /// 15606f32e7eSjoerg /// This returns a \c std::error_code if there was an error reading the 15706f32e7eSjoerg /// directory. On success, returns the reference to the directory entry 15806f32e7eSjoerg /// together with the exact path that was used to access a file by a 15906f32e7eSjoerg /// particular call to getDirectoryRef. 16006f32e7eSjoerg /// 16106f32e7eSjoerg /// \param CacheFailure If true and the file does not exist, we'll cache 16206f32e7eSjoerg /// the failure to find this file. 16306f32e7eSjoerg llvm::Expected<DirectoryEntryRef> getDirectoryRef(StringRef DirName, 16406f32e7eSjoerg bool CacheFailure = true); 16506f32e7eSjoerg 16606f32e7eSjoerg /// Get a \c DirectoryEntryRef if it exists, without doing anything on error. 16706f32e7eSjoerg llvm::Optional<DirectoryEntryRef> 16806f32e7eSjoerg getOptionalDirectoryRef(StringRef DirName, bool CacheFailure = true) { 16906f32e7eSjoerg return llvm::expectedToOptional(getDirectoryRef(DirName, CacheFailure)); 17006f32e7eSjoerg } 17106f32e7eSjoerg 17206f32e7eSjoerg /// Lookup, cache, and verify the specified directory (real or 17306f32e7eSjoerg /// virtual). 17406f32e7eSjoerg /// 17506f32e7eSjoerg /// This function is deprecated and will be removed at some point in the 17606f32e7eSjoerg /// future, new clients should use 17706f32e7eSjoerg /// \c getDirectoryRef. 17806f32e7eSjoerg /// 17906f32e7eSjoerg /// This returns a \c std::error_code if there was an error reading the 18006f32e7eSjoerg /// directory. If there is no error, the DirectoryEntry is guaranteed to be 18106f32e7eSjoerg /// non-NULL. 18206f32e7eSjoerg /// 18306f32e7eSjoerg /// \param CacheFailure If true and the file does not exist, we'll cache 18406f32e7eSjoerg /// the failure to find this file. 18506f32e7eSjoerg llvm::ErrorOr<const DirectoryEntry *> 18606f32e7eSjoerg getDirectory(StringRef DirName, bool CacheFailure = true); 18706f32e7eSjoerg 18806f32e7eSjoerg /// Lookup, cache, and verify the specified file (real or 18906f32e7eSjoerg /// virtual). 19006f32e7eSjoerg /// 19106f32e7eSjoerg /// This function is deprecated and will be removed at some point in the 19206f32e7eSjoerg /// future, new clients should use 19306f32e7eSjoerg /// \c getFileRef. 19406f32e7eSjoerg /// 19506f32e7eSjoerg /// This returns a \c std::error_code if there was an error loading the file. 19606f32e7eSjoerg /// If there is no error, the FileEntry is guaranteed to be non-NULL. 19706f32e7eSjoerg /// 19806f32e7eSjoerg /// \param OpenFile if true and the file exists, it will be opened. 19906f32e7eSjoerg /// 20006f32e7eSjoerg /// \param CacheFailure If true and the file does not exist, we'll cache 20106f32e7eSjoerg /// the failure to find this file. 20206f32e7eSjoerg llvm::ErrorOr<const FileEntry *> 20306f32e7eSjoerg getFile(StringRef Filename, bool OpenFile = false, bool CacheFailure = true); 20406f32e7eSjoerg 20506f32e7eSjoerg /// Lookup, cache, and verify the specified file (real or virtual). Return the 20606f32e7eSjoerg /// reference to the file entry together with the exact path that was used to 20706f32e7eSjoerg /// access a file by a particular call to getFileRef. If the underlying VFS is 20806f32e7eSjoerg /// a redirecting VFS that uses external file names, the returned FileEntryRef 20906f32e7eSjoerg /// will use the external name instead of the filename that was passed to this 21006f32e7eSjoerg /// method. 21106f32e7eSjoerg /// 21206f32e7eSjoerg /// This returns a \c std::error_code if there was an error loading the file, 21306f32e7eSjoerg /// or a \c FileEntryRef otherwise. 21406f32e7eSjoerg /// 21506f32e7eSjoerg /// \param OpenFile if true and the file exists, it will be opened. 21606f32e7eSjoerg /// 21706f32e7eSjoerg /// \param CacheFailure If true and the file does not exist, we'll cache 21806f32e7eSjoerg /// the failure to find this file. 21906f32e7eSjoerg llvm::Expected<FileEntryRef> getFileRef(StringRef Filename, 22006f32e7eSjoerg bool OpenFile = false, 22106f32e7eSjoerg bool CacheFailure = true); 22206f32e7eSjoerg 223*13fbcb42Sjoerg /// Get the FileEntryRef for stdin, returning an error if stdin cannot be 224*13fbcb42Sjoerg /// read. 225*13fbcb42Sjoerg /// 226*13fbcb42Sjoerg /// This reads and caches stdin before returning. Subsequent calls return the 227*13fbcb42Sjoerg /// same file entry, and a reference to the cached input is returned by calls 228*13fbcb42Sjoerg /// to getBufferForFile. 229*13fbcb42Sjoerg llvm::Expected<FileEntryRef> getSTDIN(); 230*13fbcb42Sjoerg 23106f32e7eSjoerg /// Get a FileEntryRef if it exists, without doing anything on error. 23206f32e7eSjoerg llvm::Optional<FileEntryRef> getOptionalFileRef(StringRef Filename, 23306f32e7eSjoerg bool OpenFile = false, 23406f32e7eSjoerg bool CacheFailure = true) { 23506f32e7eSjoerg return llvm::expectedToOptional( 23606f32e7eSjoerg getFileRef(Filename, OpenFile, CacheFailure)); 23706f32e7eSjoerg } 23806f32e7eSjoerg 23906f32e7eSjoerg /// Returns the current file system options getFileSystemOpts()24006f32e7eSjoerg FileSystemOptions &getFileSystemOpts() { return FileSystemOpts; } getFileSystemOpts()24106f32e7eSjoerg const FileSystemOptions &getFileSystemOpts() const { return FileSystemOpts; } 24206f32e7eSjoerg getVirtualFileSystem()24306f32e7eSjoerg llvm::vfs::FileSystem &getVirtualFileSystem() const { return *FS; } 24406f32e7eSjoerg setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)24506f32e7eSjoerg void setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) { 24606f32e7eSjoerg this->FS = std::move(FS); 24706f32e7eSjoerg } 24806f32e7eSjoerg 24906f32e7eSjoerg /// Retrieve a file entry for a "virtual" file that acts as 25006f32e7eSjoerg /// if there were a file with the given name on disk. 25106f32e7eSjoerg /// 25206f32e7eSjoerg /// The file itself is not accessed. 253*13fbcb42Sjoerg FileEntryRef getVirtualFileRef(StringRef Filename, off_t Size, 254*13fbcb42Sjoerg time_t ModificationTime); 255*13fbcb42Sjoerg 25606f32e7eSjoerg const FileEntry *getVirtualFile(StringRef Filename, off_t Size, 25706f32e7eSjoerg time_t ModificationTime); 25806f32e7eSjoerg 25906f32e7eSjoerg /// Retrieve a FileEntry that bypasses VFE, which is expected to be a virtual 26006f32e7eSjoerg /// file entry, to access the real file. The returned FileEntry will have 26106f32e7eSjoerg /// the same filename as FE but a different identity and its own stat. 26206f32e7eSjoerg /// 26306f32e7eSjoerg /// This should be used only for rare error recovery paths because it 26406f32e7eSjoerg /// bypasses all mapping and uniquing, blindly creating a new FileEntry. 26506f32e7eSjoerg /// There is no attempt to deduplicate these; if you bypass the same file 26606f32e7eSjoerg /// twice, you get two new file entries. 26706f32e7eSjoerg llvm::Optional<FileEntryRef> getBypassFile(FileEntryRef VFE); 26806f32e7eSjoerg 26906f32e7eSjoerg /// Open the specified file as a MemoryBuffer, returning a new 27006f32e7eSjoerg /// MemoryBuffer if successful, otherwise returning null. 27106f32e7eSjoerg llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 272*13fbcb42Sjoerg getBufferForFile(const FileEntry *Entry, bool isVolatile = false, 273*13fbcb42Sjoerg bool RequiresNullTerminator = true); 27406f32e7eSjoerg llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 275*13fbcb42Sjoerg getBufferForFile(StringRef Filename, bool isVolatile = false, 276*13fbcb42Sjoerg bool RequiresNullTerminator = true) { 277*13fbcb42Sjoerg return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile, 278*13fbcb42Sjoerg RequiresNullTerminator); 27906f32e7eSjoerg } 28006f32e7eSjoerg 28106f32e7eSjoerg private: 28206f32e7eSjoerg llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 283*13fbcb42Sjoerg getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile, 284*13fbcb42Sjoerg bool RequiresNullTerminator); 28506f32e7eSjoerg 28606f32e7eSjoerg public: 28706f32e7eSjoerg /// Get the 'stat' information for the given \p Path. 28806f32e7eSjoerg /// 28906f32e7eSjoerg /// If the path is relative, it will be resolved against the WorkingDir of the 29006f32e7eSjoerg /// FileManager's FileSystemOptions. 29106f32e7eSjoerg /// 29206f32e7eSjoerg /// \returns a \c std::error_code describing an error, if there was one 29306f32e7eSjoerg std::error_code getNoncachedStatValue(StringRef Path, 29406f32e7eSjoerg llvm::vfs::Status &Result); 29506f32e7eSjoerg 29606f32e7eSjoerg /// If path is not absolute and FileSystemOptions set the working 29706f32e7eSjoerg /// directory, the path is modified to be relative to the given 29806f32e7eSjoerg /// working directory. 29906f32e7eSjoerg /// \returns true if \c path changed. 30006f32e7eSjoerg bool FixupRelativePath(SmallVectorImpl<char> &path) const; 30106f32e7eSjoerg 30206f32e7eSjoerg /// Makes \c Path absolute taking into account FileSystemOptions and the 30306f32e7eSjoerg /// working directory option. 30406f32e7eSjoerg /// \returns true if \c Path changed to absolute. 30506f32e7eSjoerg bool makeAbsolutePath(SmallVectorImpl<char> &Path) const; 30606f32e7eSjoerg 30706f32e7eSjoerg /// Produce an array mapping from the unique IDs assigned to each 30806f32e7eSjoerg /// file to the corresponding FileEntry pointer. 30906f32e7eSjoerg void GetUniqueIDMapping( 31006f32e7eSjoerg SmallVectorImpl<const FileEntry *> &UIDToFiles) const; 31106f32e7eSjoerg 31206f32e7eSjoerg /// Retrieve the canonical name for a given directory. 31306f32e7eSjoerg /// 31406f32e7eSjoerg /// This is a very expensive operation, despite its results being cached, 31506f32e7eSjoerg /// and should only be used when the physical layout of the file system is 31606f32e7eSjoerg /// required, which is (almost) never. 31706f32e7eSjoerg StringRef getCanonicalName(const DirectoryEntry *Dir); 31806f32e7eSjoerg 319*13fbcb42Sjoerg /// Retrieve the canonical name for a given file. 320*13fbcb42Sjoerg /// 321*13fbcb42Sjoerg /// This is a very expensive operation, despite its results being cached, 322*13fbcb42Sjoerg /// and should only be used when the physical layout of the file system is 323*13fbcb42Sjoerg /// required, which is (almost) never. 324*13fbcb42Sjoerg StringRef getCanonicalName(const FileEntry *File); 325*13fbcb42Sjoerg 32606f32e7eSjoerg void PrintStats() const; 32706f32e7eSjoerg }; 32806f32e7eSjoerg 32906f32e7eSjoerg } // end namespace clang 33006f32e7eSjoerg 33106f32e7eSjoerg #endif // LLVM_CLANG_BASIC_FILEMANAGER_H 332