1 //===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the virtual file system interface vfs::FileSystem. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_SUPPORT_VIRTUALFILESYSTEM_H 15 #define LLVM_SUPPORT_VIRTUALFILESYSTEM_H 16 17 #include "llvm/ADT/IntrusiveRefCntPtr.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/STLFunctionalExtras.h" 21 #include "llvm/Support/Chrono.h" 22 #include "llvm/Support/ErrorOr.h" 23 #include "llvm/Support/Errc.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/Path.h" 26 #include "llvm/Support/SourceMgr.h" 27 #include <cassert> 28 #include <cstdint> 29 #include <ctime> 30 #include <memory> 31 #include <optional> 32 #include <stack> 33 #include <string> 34 #include <system_error> 35 #include <utility> 36 #include <vector> 37 38 namespace llvm { 39 40 class MemoryBuffer; 41 class MemoryBufferRef; 42 class Twine; 43 44 namespace vfs { 45 46 /// The result of a \p status operation. 47 class Status { 48 std::string Name; 49 llvm::sys::fs::UniqueID UID; 50 llvm::sys::TimePoint<> MTime; 51 uint32_t User; 52 uint32_t Group; 53 uint64_t Size; 54 llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::status_error; 55 llvm::sys::fs::perms Perms; 56 57 public: 58 // FIXME: remove when files support multiple names 59 bool IsVFSMapped = false; 60 61 /// Whether this entity has an external path different from the virtual path, 62 /// and the external path is exposed by leaking it through the abstraction. 63 /// For example, a RedirectingFileSystem will set this for paths where 64 /// UseExternalName is true. 65 /// 66 /// FIXME: Currently the external path is exposed by replacing the virtual 67 /// path in this Status object. Instead, we should leave the path in the 68 /// Status intact (matching the requested virtual path) - see 69 /// FileManager::getFileRef for how we plan to fix this. 70 bool ExposesExternalVFSPath = false; 71 72 Status() = default; 73 Status(const llvm::sys::fs::file_status &Status); 74 Status(const Twine &Name, llvm::sys::fs::UniqueID UID, 75 llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group, 76 uint64_t Size, llvm::sys::fs::file_type Type, 77 llvm::sys::fs::perms Perms); 78 79 /// Get a copy of a Status with a different size. 80 static Status copyWithNewSize(const Status &In, uint64_t NewSize); 81 /// Get a copy of a Status with a different name. 82 static Status copyWithNewName(const Status &In, const Twine &NewName); 83 static Status copyWithNewName(const llvm::sys::fs::file_status &In, 84 const Twine &NewName); 85 86 /// Returns the name that should be used for this file or directory. getName()87 StringRef getName() const { return Name; } 88 89 /// @name Status interface from llvm::sys::fs 90 /// @{ getType()91 llvm::sys::fs::file_type getType() const { return Type; } getPermissions()92 llvm::sys::fs::perms getPermissions() const { return Perms; } getLastModificationTime()93 llvm::sys::TimePoint<> getLastModificationTime() const { return MTime; } getUniqueID()94 llvm::sys::fs::UniqueID getUniqueID() const { return UID; } getUser()95 uint32_t getUser() const { return User; } getGroup()96 uint32_t getGroup() const { return Group; } getSize()97 uint64_t getSize() const { return Size; } 98 /// @} 99 /// @name Status queries 100 /// These are static queries in llvm::sys::fs. 101 /// @{ 102 bool equivalent(const Status &Other) const; 103 bool isDirectory() const; 104 bool isRegularFile() const; 105 bool isOther() const; 106 bool isSymlink() const; 107 bool isStatusKnown() const; 108 bool exists() const; 109 /// @} 110 }; 111 112 /// Represents an open file. 113 class File { 114 public: 115 /// Destroy the file after closing it (if open). 116 /// Sub-classes should generally call close() inside their destructors. We 117 /// cannot do that from the base class, since close is virtual. 118 virtual ~File(); 119 120 /// Get the status of the file. 121 virtual llvm::ErrorOr<Status> status() = 0; 122 123 /// Get the name of the file getName()124 virtual llvm::ErrorOr<std::string> getName() { 125 if (auto Status = status()) 126 return Status->getName().str(); 127 else 128 return Status.getError(); 129 } 130 131 /// Get the contents of the file as a \p MemoryBuffer. 132 virtual llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 133 getBuffer(const Twine &Name, int64_t FileSize = -1, 134 bool RequiresNullTerminator = true, bool IsVolatile = false) = 0; 135 136 /// Closes the file. 137 virtual std::error_code close() = 0; 138 139 // Get the same file with a different path. 140 static ErrorOr<std::unique_ptr<File>> 141 getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P); 142 143 protected: 144 // Set the file's underlying path. setPath(const Twine & Path)145 virtual void setPath(const Twine &Path) {} 146 }; 147 148 /// A member of a directory, yielded by a directory_iterator. 149 /// Only information available on most platforms is included. 150 class directory_entry { 151 std::string Path; 152 llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::type_unknown; 153 154 public: 155 directory_entry() = default; directory_entry(std::string Path,llvm::sys::fs::file_type Type)156 directory_entry(std::string Path, llvm::sys::fs::file_type Type) 157 : Path(std::move(Path)), Type(Type) {} 158 path()159 llvm::StringRef path() const { return Path; } type()160 llvm::sys::fs::file_type type() const { return Type; } 161 }; 162 163 namespace detail { 164 165 /// An interface for virtual file systems to provide an iterator over the 166 /// (non-recursive) contents of a directory. 167 struct DirIterImpl { 168 virtual ~DirIterImpl(); 169 170 /// Sets \c CurrentEntry to the next entry in the directory on success, 171 /// to directory_entry() at end, or returns a system-defined \c error_code. 172 virtual std::error_code increment() = 0; 173 174 directory_entry CurrentEntry; 175 }; 176 177 } // namespace detail 178 179 /// An input iterator over the entries in a virtual path, similar to 180 /// llvm::sys::fs::directory_iterator. 181 class directory_iterator { 182 std::shared_ptr<detail::DirIterImpl> Impl; // Input iterator semantics on copy 183 184 public: directory_iterator(std::shared_ptr<detail::DirIterImpl> I)185 directory_iterator(std::shared_ptr<detail::DirIterImpl> I) 186 : Impl(std::move(I)) { 187 assert(Impl.get() != nullptr && "requires non-null implementation"); 188 if (Impl->CurrentEntry.path().empty()) 189 Impl.reset(); // Normalize the end iterator to Impl == nullptr. 190 } 191 192 /// Construct an 'end' iterator. 193 directory_iterator() = default; 194 195 /// Equivalent to operator++, with an error code. increment(std::error_code & EC)196 directory_iterator &increment(std::error_code &EC) { 197 assert(Impl && "attempting to increment past end"); 198 EC = Impl->increment(); 199 if (Impl->CurrentEntry.path().empty()) 200 Impl.reset(); // Normalize the end iterator to Impl == nullptr. 201 return *this; 202 } 203 204 const directory_entry &operator*() const { return Impl->CurrentEntry; } 205 const directory_entry *operator->() const { return &Impl->CurrentEntry; } 206 207 bool operator==(const directory_iterator &RHS) const { 208 if (Impl && RHS.Impl) 209 return Impl->CurrentEntry.path() == RHS.Impl->CurrentEntry.path(); 210 return !Impl && !RHS.Impl; 211 } 212 bool operator!=(const directory_iterator &RHS) const { 213 return !(*this == RHS); 214 } 215 }; 216 217 class FileSystem; 218 219 namespace detail { 220 221 /// Keeps state for the recursive_directory_iterator. 222 struct RecDirIterState { 223 std::stack<directory_iterator, std::vector<directory_iterator>> Stack; 224 bool HasNoPushRequest = false; 225 }; 226 227 } // end namespace detail 228 229 /// An input iterator over the recursive contents of a virtual path, 230 /// similar to llvm::sys::fs::recursive_directory_iterator. 231 class recursive_directory_iterator { 232 FileSystem *FS; 233 std::shared_ptr<detail::RecDirIterState> 234 State; // Input iterator semantics on copy. 235 236 public: 237 recursive_directory_iterator(FileSystem &FS, const Twine &Path, 238 std::error_code &EC); 239 240 /// Construct an 'end' iterator. 241 recursive_directory_iterator() = default; 242 243 /// Equivalent to operator++, with an error code. 244 recursive_directory_iterator &increment(std::error_code &EC); 245 246 const directory_entry &operator*() const { return *State->Stack.top(); } 247 const directory_entry *operator->() const { return &*State->Stack.top(); } 248 249 bool operator==(const recursive_directory_iterator &Other) const { 250 return State == Other.State; // identity 251 } 252 bool operator!=(const recursive_directory_iterator &RHS) const { 253 return !(*this == RHS); 254 } 255 256 /// Gets the current level. Starting path is at level 0. level()257 int level() const { 258 assert(!State->Stack.empty() && 259 "Cannot get level without any iteration state"); 260 return State->Stack.size() - 1; 261 } 262 no_push()263 void no_push() { State->HasNoPushRequest = true; } 264 }; 265 266 /// The virtual file system interface. 267 class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem> { 268 public: 269 virtual ~FileSystem(); 270 271 /// Get the status of the entry at \p Path, if one exists. 272 virtual llvm::ErrorOr<Status> status(const Twine &Path) = 0; 273 274 /// Get a \p File object for the file at \p Path, if one exists. 275 virtual llvm::ErrorOr<std::unique_ptr<File>> 276 openFileForRead(const Twine &Path) = 0; 277 278 /// This is a convenience method that opens a file, gets its content and then 279 /// closes the file. 280 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 281 getBufferForFile(const Twine &Name, int64_t FileSize = -1, 282 bool RequiresNullTerminator = true, bool IsVolatile = false); 283 284 /// Get a directory_iterator for \p Dir. 285 /// \note The 'end' iterator is directory_iterator(). 286 virtual directory_iterator dir_begin(const Twine &Dir, 287 std::error_code &EC) = 0; 288 289 /// Set the working directory. This will affect all following operations on 290 /// this file system and may propagate down for nested file systems. 291 virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0; 292 293 /// Get the working directory of this file system. 294 virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0; 295 296 /// Gets real path of \p Path e.g. collapse all . and .. patterns, resolve 297 /// symlinks. For real file system, this uses `llvm::sys::fs::real_path`. 298 /// This returns errc::operation_not_permitted if not implemented by subclass. 299 virtual std::error_code getRealPath(const Twine &Path, 300 SmallVectorImpl<char> &Output) const; 301 302 /// Check whether a file exists. Provided for convenience. 303 bool exists(const Twine &Path); 304 305 /// Is the file mounted on a local filesystem? 306 virtual std::error_code isLocal(const Twine &Path, bool &Result); 307 308 /// Make \a Path an absolute path. 309 /// 310 /// Makes \a Path absolute using the current directory if it is not already. 311 /// An empty \a Path will result in the current directory. 312 /// 313 /// /absolute/path => /absolute/path 314 /// relative/../path => <current-directory>/relative/../path 315 /// 316 /// \param Path A path that is modified to be an absolute path. 317 /// \returns success if \a path has been made absolute, otherwise a 318 /// platform-specific error_code. 319 virtual std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const; 320 321 enum class PrintType { Summary, Contents, RecursiveContents }; 322 void print(raw_ostream &OS, PrintType Type = PrintType::Contents, 323 unsigned IndentLevel = 0) const { 324 printImpl(OS, Type, IndentLevel); 325 } 326 327 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 328 LLVM_DUMP_METHOD void dump() const; 329 #endif 330 331 protected: printImpl(raw_ostream & OS,PrintType Type,unsigned IndentLevel)332 virtual void printImpl(raw_ostream &OS, PrintType Type, 333 unsigned IndentLevel) const { 334 printIndent(OS, IndentLevel); 335 OS << "FileSystem\n"; 336 } 337 printIndent(raw_ostream & OS,unsigned IndentLevel)338 void printIndent(raw_ostream &OS, unsigned IndentLevel) const { 339 for (unsigned i = 0; i < IndentLevel; ++i) 340 OS << " "; 341 } 342 }; 343 344 /// Gets an \p vfs::FileSystem for the 'real' file system, as seen by 345 /// the operating system. 346 /// The working directory is linked to the process's working directory. 347 /// (This is usually thread-hostile). 348 IntrusiveRefCntPtr<FileSystem> getRealFileSystem(); 349 350 /// Create an \p vfs::FileSystem for the 'real' file system, as seen by 351 /// the operating system. 352 /// It has its own working directory, independent of (but initially equal to) 353 /// that of the process. 354 std::unique_ptr<FileSystem> createPhysicalFileSystem(); 355 356 /// A file system that allows overlaying one \p AbstractFileSystem on top 357 /// of another. 358 /// 359 /// Consists of a stack of >=1 \p FileSystem objects, which are treated as being 360 /// one merged file system. When there is a directory that exists in more than 361 /// one file system, the \p OverlayFileSystem contains a directory containing 362 /// the union of their contents. The attributes (permissions, etc.) of the 363 /// top-most (most recently added) directory are used. When there is a file 364 /// that exists in more than one file system, the file in the top-most file 365 /// system overrides the other(s). 366 class OverlayFileSystem : public FileSystem { 367 using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>; 368 369 /// The stack of file systems, implemented as a list in order of 370 /// their addition. 371 FileSystemList FSList; 372 373 public: 374 OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base); 375 376 /// Pushes a file system on top of the stack. 377 void pushOverlay(IntrusiveRefCntPtr<FileSystem> FS); 378 379 llvm::ErrorOr<Status> status(const Twine &Path) override; 380 llvm::ErrorOr<std::unique_ptr<File>> 381 openFileForRead(const Twine &Path) override; 382 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; 383 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override; 384 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 385 std::error_code isLocal(const Twine &Path, bool &Result) override; 386 std::error_code getRealPath(const Twine &Path, 387 SmallVectorImpl<char> &Output) const override; 388 389 using iterator = FileSystemList::reverse_iterator; 390 using const_iterator = FileSystemList::const_reverse_iterator; 391 using reverse_iterator = FileSystemList::iterator; 392 using const_reverse_iterator = FileSystemList::const_iterator; 393 using range = iterator_range<iterator>; 394 using const_range = iterator_range<const_iterator>; 395 396 /// Get an iterator pointing to the most recently added file system. overlays_begin()397 iterator overlays_begin() { return FSList.rbegin(); } overlays_begin()398 const_iterator overlays_begin() const { return FSList.rbegin(); } 399 400 /// Get an iterator pointing one-past the least recently added file system. overlays_end()401 iterator overlays_end() { return FSList.rend(); } overlays_end()402 const_iterator overlays_end() const { return FSList.rend(); } 403 404 /// Get an iterator pointing to the least recently added file system. overlays_rbegin()405 reverse_iterator overlays_rbegin() { return FSList.begin(); } overlays_rbegin()406 const_reverse_iterator overlays_rbegin() const { return FSList.begin(); } 407 408 /// Get an iterator pointing one-past the most recently added file system. overlays_rend()409 reverse_iterator overlays_rend() { return FSList.end(); } overlays_rend()410 const_reverse_iterator overlays_rend() const { return FSList.end(); } 411 overlays_range()412 range overlays_range() { return llvm::reverse(FSList); } overlays_range()413 const_range overlays_range() const { return llvm::reverse(FSList); } 414 415 protected: 416 void printImpl(raw_ostream &OS, PrintType Type, 417 unsigned IndentLevel) const override; 418 }; 419 420 /// By default, this delegates all calls to the underlying file system. This 421 /// is useful when derived file systems want to override some calls and still 422 /// proxy other calls. 423 class ProxyFileSystem : public FileSystem { 424 public: ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)425 explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS) 426 : FS(std::move(FS)) {} 427 status(const Twine & Path)428 llvm::ErrorOr<Status> status(const Twine &Path) override { 429 return FS->status(Path); 430 } 431 llvm::ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine & Path)432 openFileForRead(const Twine &Path) override { 433 return FS->openFileForRead(Path); 434 } dir_begin(const Twine & Dir,std::error_code & EC)435 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override { 436 return FS->dir_begin(Dir, EC); 437 } getCurrentWorkingDirectory()438 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override { 439 return FS->getCurrentWorkingDirectory(); 440 } setCurrentWorkingDirectory(const Twine & Path)441 std::error_code setCurrentWorkingDirectory(const Twine &Path) override { 442 return FS->setCurrentWorkingDirectory(Path); 443 } getRealPath(const Twine & Path,SmallVectorImpl<char> & Output)444 std::error_code getRealPath(const Twine &Path, 445 SmallVectorImpl<char> &Output) const override { 446 return FS->getRealPath(Path, Output); 447 } isLocal(const Twine & Path,bool & Result)448 std::error_code isLocal(const Twine &Path, bool &Result) override { 449 return FS->isLocal(Path, Result); 450 } 451 452 protected: getUnderlyingFS()453 FileSystem &getUnderlyingFS() const { return *FS; } 454 455 private: 456 IntrusiveRefCntPtr<FileSystem> FS; 457 458 virtual void anchor(); 459 }; 460 461 namespace detail { 462 463 class InMemoryDirectory; 464 class InMemoryNode; 465 466 struct NewInMemoryNodeInfo { 467 llvm::sys::fs::UniqueID DirUID; 468 StringRef Path; 469 StringRef Name; 470 time_t ModificationTime; 471 std::unique_ptr<llvm::MemoryBuffer> Buffer; 472 uint32_t User; 473 uint32_t Group; 474 llvm::sys::fs::file_type Type; 475 llvm::sys::fs::perms Perms; 476 477 Status makeStatus() const; 478 }; 479 480 class NamedNodeOrError { 481 ErrorOr<std::pair<llvm::SmallString<128>, const detail::InMemoryNode *>> 482 Value; 483 484 public: NamedNodeOrError(llvm::SmallString<128> Name,const detail::InMemoryNode * Node)485 NamedNodeOrError(llvm::SmallString<128> Name, 486 const detail::InMemoryNode *Node) 487 : Value(std::make_pair(Name, Node)) {} NamedNodeOrError(std::error_code EC)488 NamedNodeOrError(std::error_code EC) : Value(EC) {} NamedNodeOrError(llvm::errc EC)489 NamedNodeOrError(llvm::errc EC) : Value(EC) {} 490 getName()491 StringRef getName() const { return (*Value).first; } 492 explicit operator bool() const { return static_cast<bool>(Value); } error_code()493 operator std::error_code() const { return Value.getError(); } getError()494 std::error_code getError() const { return Value.getError(); } 495 const detail::InMemoryNode *operator*() const { return (*Value).second; } 496 }; 497 498 } // namespace detail 499 500 /// An in-memory file system. 501 class InMemoryFileSystem : public FileSystem { 502 std::unique_ptr<detail::InMemoryDirectory> Root; 503 std::string WorkingDirectory; 504 bool UseNormalizedPaths = true; 505 506 using MakeNodeFn = llvm::function_ref<std::unique_ptr<detail::InMemoryNode>( 507 detail::NewInMemoryNodeInfo)>; 508 509 /// Create node with \p MakeNode and add it into this filesystem at \p Path. 510 bool addFile(const Twine &Path, time_t ModificationTime, 511 std::unique_ptr<llvm::MemoryBuffer> Buffer, 512 std::optional<uint32_t> User, std::optional<uint32_t> Group, 513 std::optional<llvm::sys::fs::file_type> Type, 514 std::optional<llvm::sys::fs::perms> Perms, MakeNodeFn MakeNode); 515 516 /// Looks up the in-memory node for the path \p P. 517 /// If \p FollowFinalSymlink is true, the returned node is guaranteed to 518 /// not be a symlink and its path may differ from \p P. 519 detail::NamedNodeOrError lookupNode(const Twine &P, bool FollowFinalSymlink, 520 size_t SymlinkDepth = 0) const; 521 522 class DirIterator; 523 524 public: 525 explicit InMemoryFileSystem(bool UseNormalizedPaths = true); 526 ~InMemoryFileSystem() override; 527 528 /// Add a file containing a buffer or a directory to the VFS with a 529 /// path. The VFS owns the buffer. If present, User, Group, Type 530 /// and Perms apply to the newly-created file or directory. 531 /// \return true if the file or directory was successfully added, 532 /// false if the file or directory already exists in the file system with 533 /// different contents. 534 bool addFile(const Twine &Path, time_t ModificationTime, 535 std::unique_ptr<llvm::MemoryBuffer> Buffer, 536 std::optional<uint32_t> User = std::nullopt, 537 std::optional<uint32_t> Group = std::nullopt, 538 std::optional<llvm::sys::fs::file_type> Type = std::nullopt, 539 std::optional<llvm::sys::fs::perms> Perms = std::nullopt); 540 541 /// Add a hard link to a file. 542 /// 543 /// Here hard links are not intended to be fully equivalent to the classical 544 /// filesystem. Both the hard link and the file share the same buffer and 545 /// status (and thus have the same UniqueID). Because of this there is no way 546 /// to distinguish between the link and the file after the link has been 547 /// added. 548 /// 549 /// The \p Target path must be an existing file or a hardlink. The 550 /// \p NewLink file must not have been added before. The \p Target 551 /// path must not be a directory. The \p NewLink node is added as a hard 552 /// link which points to the resolved file of \p Target node. 553 /// \return true if the above condition is satisfied and hardlink was 554 /// successfully created, false otherwise. 555 bool addHardLink(const Twine &NewLink, const Twine &Target); 556 557 /// Arbitrary max depth to search through symlinks. We can get into problems 558 /// if a link links to a link that links back to the link, for example. 559 static constexpr size_t MaxSymlinkDepth = 16; 560 561 /// Add a symbolic link. Unlike a HardLink, because \p Target doesn't need 562 /// to refer to a file (or refer to anything, as it happens). Also, an 563 /// in-memory directory for \p Target isn't automatically created. 564 bool 565 addSymbolicLink(const Twine &NewLink, const Twine &Target, 566 time_t ModificationTime, 567 std::optional<uint32_t> User = std::nullopt, 568 std::optional<uint32_t> Group = std::nullopt, 569 std::optional<llvm::sys::fs::perms> Perms = std::nullopt); 570 571 /// Add a buffer to the VFS with a path. The VFS does not own the buffer. 572 /// If present, User, Group, Type and Perms apply to the newly-created file 573 /// or directory. 574 /// \return true if the file or directory was successfully added, 575 /// false if the file or directory already exists in the file system with 576 /// different contents. 577 bool addFileNoOwn(const Twine &Path, time_t ModificationTime, 578 const llvm::MemoryBufferRef &Buffer, 579 std::optional<uint32_t> User = std::nullopt, 580 std::optional<uint32_t> Group = std::nullopt, 581 std::optional<llvm::sys::fs::file_type> Type = std::nullopt, 582 std::optional<llvm::sys::fs::perms> Perms = std::nullopt); 583 584 std::string toString() const; 585 586 /// Return true if this file system normalizes . and .. in paths. useNormalizedPaths()587 bool useNormalizedPaths() const { return UseNormalizedPaths; } 588 589 llvm::ErrorOr<Status> status(const Twine &Path) override; 590 llvm::ErrorOr<std::unique_ptr<File>> 591 openFileForRead(const Twine &Path) override; 592 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; 593 getCurrentWorkingDirectory()594 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override { 595 return WorkingDirectory; 596 } 597 /// Canonicalizes \p Path by combining with the current working 598 /// directory and normalizing the path (e.g. remove dots). If the current 599 /// working directory is not set, this returns errc::operation_not_permitted. 600 /// 601 /// This doesn't resolve symlinks as they are not supported in in-memory file 602 /// system. 603 std::error_code getRealPath(const Twine &Path, 604 SmallVectorImpl<char> &Output) const override; 605 std::error_code isLocal(const Twine &Path, bool &Result) override; 606 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 607 608 protected: 609 void printImpl(raw_ostream &OS, PrintType Type, 610 unsigned IndentLevel) const override; 611 }; 612 613 /// Get a globally unique ID for a virtual file or directory. 614 llvm::sys::fs::UniqueID getNextVirtualUniqueID(); 615 616 /// Gets a \p FileSystem for a virtual file system described in YAML 617 /// format. 618 std::unique_ptr<FileSystem> 619 getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer, 620 llvm::SourceMgr::DiagHandlerTy DiagHandler, 621 StringRef YAMLFilePath, void *DiagContext = nullptr, 622 IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem()); 623 624 struct YAMLVFSEntry { 625 template <typename T1, typename T2> 626 YAMLVFSEntry(T1 &&VPath, T2 &&RPath, bool IsDirectory = false) VPathYAMLVFSEntry627 : VPath(std::forward<T1>(VPath)), RPath(std::forward<T2>(RPath)), 628 IsDirectory(IsDirectory) {} 629 std::string VPath; 630 std::string RPath; 631 bool IsDirectory = false; 632 }; 633 634 class RedirectingFSDirIterImpl; 635 class RedirectingFileSystemParser; 636 637 /// A virtual file system parsed from a YAML file. 638 /// 639 /// Currently, this class allows creating virtual files and directories. Virtual 640 /// files map to existing external files in \c ExternalFS, and virtual 641 /// directories may either map to existing directories in \c ExternalFS or list 642 /// their contents in the form of other virtual directories and/or files. 643 /// 644 /// The basic structure of the parsed file is: 645 /// \verbatim 646 /// { 647 /// 'version': <version number>, 648 /// <optional configuration> 649 /// 'roots': [ 650 /// <directory entries> 651 /// ] 652 /// } 653 /// \endverbatim 654 /// 655 /// The roots may be absolute or relative. If relative they will be made 656 /// absolute against either current working directory or the directory where 657 /// the Overlay YAML file is located, depending on the 'root-relative' 658 /// configuration. 659 /// 660 /// All configuration options are optional. 661 /// 'case-sensitive': <boolean, default=(true for Posix, false for Windows)> 662 /// 'use-external-names': <boolean, default=true> 663 /// 'root-relative': <string, one of 'cwd' or 'overlay-dir', default='cwd'> 664 /// 'overlay-relative': <boolean, default=false> 665 /// 'fallthrough': <boolean, default=true, deprecated - use 'redirecting-with' 666 /// instead> 667 /// 'redirecting-with': <string, one of 'fallthrough', 'fallback', or 668 /// 'redirect-only', default='fallthrough'> 669 /// 670 /// To clarify, 'root-relative' option will prepend the current working 671 /// directory, or the overlay directory to the 'roots->name' field only if 672 /// 'roots->name' is a relative path. On the other hand, when 'overlay-relative' 673 /// is set to 'true', external paths will always be prepended with the overlay 674 /// directory, even if external paths are not relative paths. The 675 /// 'root-relative' option has no interaction with the 'overlay-relative' 676 /// option. 677 /// 678 /// Virtual directories that list their contents are represented as 679 /// \verbatim 680 /// { 681 /// 'type': 'directory', 682 /// 'name': <string>, 683 /// 'contents': [ <file or directory entries> ] 684 /// } 685 /// \endverbatim 686 /// 687 /// The default attributes for such virtual directories are: 688 /// \verbatim 689 /// MTime = now() when created 690 /// Perms = 0777 691 /// User = Group = 0 692 /// Size = 0 693 /// UniqueID = unspecified unique value 694 /// \endverbatim 695 /// 696 /// When a path prefix matches such a directory, the next component in the path 697 /// is matched against the entries in the 'contents' array. 698 /// 699 /// Re-mapped directories, on the other hand, are represented as 700 /// /// \verbatim 701 /// { 702 /// 'type': 'directory-remap', 703 /// 'name': <string>, 704 /// 'use-external-name': <boolean>, # Optional 705 /// 'external-contents': <path to external directory> 706 /// } 707 /// \endverbatim 708 /// 709 /// and inherit their attributes from the external directory. When a path 710 /// prefix matches such an entry, the unmatched components are appended to the 711 /// 'external-contents' path, and the resulting path is looked up in the 712 /// external file system instead. 713 /// 714 /// Re-mapped files are represented as 715 /// \verbatim 716 /// { 717 /// 'type': 'file', 718 /// 'name': <string>, 719 /// 'use-external-name': <boolean>, # Optional 720 /// 'external-contents': <path to external file> 721 /// } 722 /// \endverbatim 723 /// 724 /// Their attributes and file contents are determined by looking up the file at 725 /// their 'external-contents' path in the external file system. 726 /// 727 /// For 'file', 'directory' and 'directory-remap' entries the 'name' field may 728 /// contain multiple path components (e.g. /path/to/file). However, any 729 /// directory in such a path that contains more than one child must be uniquely 730 /// represented by a 'directory' entry. 731 /// 732 /// When the 'use-external-name' field is set, calls to \a vfs::File::status() 733 /// give the external (remapped) filesystem name instead of the name the file 734 /// was accessed by. This is an intentional leak through the \a 735 /// RedirectingFileSystem abstraction layer. It enables clients to discover 736 /// (and use) the external file location when communicating with users or tools 737 /// that don't use the same VFS overlay. 738 /// 739 /// FIXME: 'use-external-name' causes behaviour that's inconsistent with how 740 /// "real" filesystems behave. Maybe there should be a separate channel for 741 /// this information. 742 class RedirectingFileSystem : public vfs::FileSystem { 743 public: 744 enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File }; 745 enum NameKind { NK_NotSet, NK_External, NK_Virtual }; 746 747 /// The type of redirection to perform. 748 enum class RedirectKind { 749 /// Lookup the redirected path first (ie. the one specified in 750 /// 'external-contents') and if that fails "fallthrough" to a lookup of the 751 /// originally provided path. 752 Fallthrough, 753 /// Lookup the provided path first and if that fails, "fallback" to a 754 /// lookup of the redirected path. 755 Fallback, 756 /// Only lookup the redirected path, do not lookup the originally provided 757 /// path. 758 RedirectOnly 759 }; 760 761 /// The type of relative path used by Roots. 762 enum class RootRelativeKind { 763 /// The roots are relative to the current working directory. 764 CWD, 765 /// The roots are relative to the directory where the Overlay YAML file 766 // locates. 767 OverlayDir 768 }; 769 770 /// A single file or directory in the VFS. 771 class Entry { 772 EntryKind Kind; 773 std::string Name; 774 775 public: Entry(EntryKind K,StringRef Name)776 Entry(EntryKind K, StringRef Name) : Kind(K), Name(Name) {} 777 virtual ~Entry() = default; 778 getName()779 StringRef getName() const { return Name; } getKind()780 EntryKind getKind() const { return Kind; } 781 }; 782 783 /// A directory in the vfs with explicitly specified contents. 784 class DirectoryEntry : public Entry { 785 std::vector<std::unique_ptr<Entry>> Contents; 786 Status S; 787 788 public: 789 /// Constructs a directory entry with explicitly specified contents. DirectoryEntry(StringRef Name,std::vector<std::unique_ptr<Entry>> Contents,Status S)790 DirectoryEntry(StringRef Name, std::vector<std::unique_ptr<Entry>> Contents, 791 Status S) 792 : Entry(EK_Directory, Name), Contents(std::move(Contents)), 793 S(std::move(S)) {} 794 795 /// Constructs an empty directory entry. DirectoryEntry(StringRef Name,Status S)796 DirectoryEntry(StringRef Name, Status S) 797 : Entry(EK_Directory, Name), S(std::move(S)) {} 798 getStatus()799 Status getStatus() { return S; } 800 addContent(std::unique_ptr<Entry> Content)801 void addContent(std::unique_ptr<Entry> Content) { 802 Contents.push_back(std::move(Content)); 803 } 804 getLastContent()805 Entry *getLastContent() const { return Contents.back().get(); } 806 807 using iterator = decltype(Contents)::iterator; 808 contents_begin()809 iterator contents_begin() { return Contents.begin(); } contents_end()810 iterator contents_end() { return Contents.end(); } 811 classof(const Entry * E)812 static bool classof(const Entry *E) { return E->getKind() == EK_Directory; } 813 }; 814 815 /// A file or directory in the vfs that is mapped to a file or directory in 816 /// the external filesystem. 817 class RemapEntry : public Entry { 818 std::string ExternalContentsPath; 819 NameKind UseName; 820 821 protected: RemapEntry(EntryKind K,StringRef Name,StringRef ExternalContentsPath,NameKind UseName)822 RemapEntry(EntryKind K, StringRef Name, StringRef ExternalContentsPath, 823 NameKind UseName) 824 : Entry(K, Name), ExternalContentsPath(ExternalContentsPath), 825 UseName(UseName) {} 826 827 public: getExternalContentsPath()828 StringRef getExternalContentsPath() const { return ExternalContentsPath; } 829 830 /// Whether to use the external path as the name for this file or directory. useExternalName(bool GlobalUseExternalName)831 bool useExternalName(bool GlobalUseExternalName) const { 832 return UseName == NK_NotSet ? GlobalUseExternalName 833 : (UseName == NK_External); 834 } 835 getUseName()836 NameKind getUseName() const { return UseName; } 837 classof(const Entry * E)838 static bool classof(const Entry *E) { 839 switch (E->getKind()) { 840 case EK_DirectoryRemap: 841 [[fallthrough]]; 842 case EK_File: 843 return true; 844 case EK_Directory: 845 return false; 846 } 847 llvm_unreachable("invalid entry kind"); 848 } 849 }; 850 851 /// A directory in the vfs that maps to a directory in the external file 852 /// system. 853 class DirectoryRemapEntry : public RemapEntry { 854 public: DirectoryRemapEntry(StringRef Name,StringRef ExternalContentsPath,NameKind UseName)855 DirectoryRemapEntry(StringRef Name, StringRef ExternalContentsPath, 856 NameKind UseName) 857 : RemapEntry(EK_DirectoryRemap, Name, ExternalContentsPath, UseName) {} 858 classof(const Entry * E)859 static bool classof(const Entry *E) { 860 return E->getKind() == EK_DirectoryRemap; 861 } 862 }; 863 864 /// A file in the vfs that maps to a file in the external file system. 865 class FileEntry : public RemapEntry { 866 public: FileEntry(StringRef Name,StringRef ExternalContentsPath,NameKind UseName)867 FileEntry(StringRef Name, StringRef ExternalContentsPath, NameKind UseName) 868 : RemapEntry(EK_File, Name, ExternalContentsPath, UseName) {} 869 classof(const Entry * E)870 static bool classof(const Entry *E) { return E->getKind() == EK_File; } 871 }; 872 873 /// Represents the result of a path lookup into the RedirectingFileSystem. 874 struct LookupResult { 875 /// Chain of parent directory entries for \c E. 876 llvm::SmallVector<Entry *, 32> Parents; 877 878 /// The entry the looked-up path corresponds to. 879 Entry *E; 880 881 private: 882 /// When the found Entry is a DirectoryRemapEntry, stores the path in the 883 /// external file system that the looked-up path in the virtual file system 884 // corresponds to. 885 std::optional<std::string> ExternalRedirect; 886 887 public: 888 LookupResult(Entry *E, sys::path::const_iterator Start, 889 sys::path::const_iterator End); 890 891 /// If the found Entry maps the input path to a path in the external 892 /// file system (i.e. it is a FileEntry or DirectoryRemapEntry), returns 893 /// that path. getExternalRedirectLookupResult894 std::optional<StringRef> getExternalRedirect() const { 895 if (isa<DirectoryRemapEntry>(E)) 896 return StringRef(*ExternalRedirect); 897 if (auto *FE = dyn_cast<FileEntry>(E)) 898 return FE->getExternalContentsPath(); 899 return std::nullopt; 900 } 901 902 /// Get the (canonical) path of the found entry. This uses the as-written 903 /// path components from the VFS specification. 904 void getPath(llvm::SmallVectorImpl<char> &Path) const; 905 }; 906 907 private: 908 friend class RedirectingFSDirIterImpl; 909 friend class RedirectingFileSystemParser; 910 911 /// Canonicalize path by removing ".", "..", "./", components. This is 912 /// a VFS request, do not bother about symlinks in the path components 913 /// but canonicalize in order to perform the correct entry search. 914 std::error_code makeCanonical(SmallVectorImpl<char> &Path) const; 915 916 /// Get the File status, or error, from the underlying external file system. 917 /// This returns the status with the originally requested name, while looking 918 /// up the entry using the canonical path. 919 ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath, 920 const Twine &OriginalPath) const; 921 922 /// Make \a Path an absolute path. 923 /// 924 /// Makes \a Path absolute using the \a WorkingDir if it is not already. 925 /// 926 /// /absolute/path => /absolute/path 927 /// relative/../path => <WorkingDir>/relative/../path 928 /// 929 /// \param WorkingDir A path that will be used as the base Dir if \a Path 930 /// is not already absolute. 931 /// \param Path A path that is modified to be an absolute path. 932 /// \returns success if \a path has been made absolute, otherwise a 933 /// platform-specific error_code. 934 std::error_code makeAbsolute(StringRef WorkingDir, 935 SmallVectorImpl<char> &Path) const; 936 937 // In a RedirectingFileSystem, keys can be specified in Posix or Windows 938 // style (or even a mixture of both), so this comparison helper allows 939 // slashes (representing a root) to match backslashes (and vice versa). Note 940 // that, other than the root, path components should not contain slashes or 941 // backslashes. pathComponentMatches(llvm::StringRef lhs,llvm::StringRef rhs)942 bool pathComponentMatches(llvm::StringRef lhs, llvm::StringRef rhs) const { 943 if ((CaseSensitive ? lhs.equals(rhs) : lhs.equals_insensitive(rhs))) 944 return true; 945 return (lhs == "/" && rhs == "\\") || (lhs == "\\" && rhs == "/"); 946 } 947 948 /// The root(s) of the virtual file system. 949 std::vector<std::unique_ptr<Entry>> Roots; 950 951 /// The current working directory of the file system. 952 std::string WorkingDirectory; 953 954 /// The file system to use for external references. 955 IntrusiveRefCntPtr<FileSystem> ExternalFS; 956 957 /// This represents the directory path that the YAML file is located. 958 /// This will be prefixed to each 'external-contents' if IsRelativeOverlay 959 /// is set. This will also be prefixed to each 'roots->name' if RootRelative 960 /// is set to RootRelativeKind::OverlayDir and the path is relative. 961 std::string OverlayFileDir; 962 963 /// @name Configuration 964 /// @{ 965 966 /// Whether to perform case-sensitive comparisons. 967 /// 968 /// Currently, case-insensitive matching only works correctly with ASCII. 969 bool CaseSensitive = is_style_posix(sys::path::Style::native); 970 971 /// IsRelativeOverlay marks whether a OverlayFileDir path must 972 /// be prefixed in every 'external-contents' when reading from YAML files. 973 bool IsRelativeOverlay = false; 974 975 /// Whether to use to use the value of 'external-contents' for the 976 /// names of files. This global value is overridable on a per-file basis. 977 bool UseExternalNames = true; 978 979 /// Determines the lookups to perform, as well as their order. See 980 /// \c RedirectKind for details. 981 RedirectKind Redirection = RedirectKind::Fallthrough; 982 983 /// Determine the prefix directory if the roots are relative paths. See 984 /// \c RootRelativeKind for details. 985 RootRelativeKind RootRelative = RootRelativeKind::CWD; 986 /// @} 987 988 RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS); 989 990 /// Looks up the path <tt>[Start, End)</tt> in \p From, possibly recursing 991 /// into the contents of \p From if it is a directory. Returns a LookupResult 992 /// giving the matched entry and, if that entry is a FileEntry or 993 /// DirectoryRemapEntry, the path it redirects to in the external file system. 994 ErrorOr<LookupResult> 995 lookupPathImpl(llvm::sys::path::const_iterator Start, 996 llvm::sys::path::const_iterator End, Entry *From, 997 llvm::SmallVectorImpl<Entry *> &Entries) const; 998 999 /// Get the status for a path with the provided \c LookupResult. 1000 ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath, 1001 const LookupResult &Result); 1002 1003 public: 1004 /// Looks up \p Path in \c Roots and returns a LookupResult giving the 1005 /// matched entry and, if the entry was a FileEntry or DirectoryRemapEntry, 1006 /// the path it redirects to in the external file system. 1007 ErrorOr<LookupResult> lookupPath(StringRef Path) const; 1008 1009 /// Parses \p Buffer, which is expected to be in YAML format and 1010 /// returns a virtual file system representing its contents. 1011 static std::unique_ptr<RedirectingFileSystem> 1012 create(std::unique_ptr<MemoryBuffer> Buffer, 1013 SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath, 1014 void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS); 1015 1016 /// Redirect each of the remapped files from first to second. 1017 static std::unique_ptr<RedirectingFileSystem> 1018 create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles, 1019 bool UseExternalNames, FileSystem &ExternalFS); 1020 1021 ErrorOr<Status> status(const Twine &Path) override; 1022 ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override; 1023 1024 std::error_code getRealPath(const Twine &Path, 1025 SmallVectorImpl<char> &Output) const override; 1026 1027 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override; 1028 1029 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 1030 1031 std::error_code isLocal(const Twine &Path, bool &Result) override; 1032 1033 std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const override; 1034 1035 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; 1036 1037 void setOverlayFileDir(StringRef PrefixDir); 1038 1039 StringRef getOverlayFileDir() const; 1040 1041 /// Sets the redirection kind to \c Fallthrough if true or \c RedirectOnly 1042 /// otherwise. Will removed in the future, use \c setRedirection instead. 1043 void setFallthrough(bool Fallthrough); 1044 1045 void setRedirection(RedirectingFileSystem::RedirectKind Kind); 1046 1047 std::vector<llvm::StringRef> getRoots() const; 1048 1049 void printEntry(raw_ostream &OS, Entry *E, unsigned IndentLevel = 0) const; 1050 1051 protected: 1052 void printImpl(raw_ostream &OS, PrintType Type, 1053 unsigned IndentLevel) const override; 1054 }; 1055 1056 /// Collect all pairs of <virtual path, real path> entries from the 1057 /// \p YAMLFilePath. This is used by the module dependency collector to forward 1058 /// the entries into the reproducer output VFS YAML file. 1059 void collectVFSFromYAML( 1060 std::unique_ptr<llvm::MemoryBuffer> Buffer, 1061 llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath, 1062 SmallVectorImpl<YAMLVFSEntry> &CollectedEntries, 1063 void *DiagContext = nullptr, 1064 IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem()); 1065 1066 class YAMLVFSWriter { 1067 std::vector<YAMLVFSEntry> Mappings; 1068 std::optional<bool> IsCaseSensitive; 1069 std::optional<bool> IsOverlayRelative; 1070 std::optional<bool> UseExternalNames; 1071 std::string OverlayDir; 1072 1073 void addEntry(StringRef VirtualPath, StringRef RealPath, bool IsDirectory); 1074 1075 public: 1076 YAMLVFSWriter() = default; 1077 1078 void addFileMapping(StringRef VirtualPath, StringRef RealPath); 1079 void addDirectoryMapping(StringRef VirtualPath, StringRef RealPath); 1080 setCaseSensitivity(bool CaseSensitive)1081 void setCaseSensitivity(bool CaseSensitive) { 1082 IsCaseSensitive = CaseSensitive; 1083 } 1084 setUseExternalNames(bool UseExtNames)1085 void setUseExternalNames(bool UseExtNames) { UseExternalNames = UseExtNames; } 1086 setOverlayDir(StringRef OverlayDirectory)1087 void setOverlayDir(StringRef OverlayDirectory) { 1088 IsOverlayRelative = true; 1089 OverlayDir.assign(OverlayDirectory.str()); 1090 } 1091 getMappings()1092 const std::vector<YAMLVFSEntry> &getMappings() const { return Mappings; } 1093 1094 void write(llvm::raw_ostream &OS); 1095 }; 1096 1097 } // namespace vfs 1098 } // namespace llvm 1099 1100 #endif // LLVM_SUPPORT_VIRTUALFILESYSTEM_H 1101