1 //===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the virtual file system interface vfs::FileSystem. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_SUPPORT_VIRTUALFILESYSTEM_H 15 #define LLVM_SUPPORT_VIRTUALFILESYSTEM_H 16 17 #include "llvm/ADT/IntrusiveRefCntPtr.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/STLFunctionalExtras.h" 21 #include "llvm/Support/Chrono.h" 22 #include "llvm/Support/ErrorOr.h" 23 #include "llvm/Support/Errc.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/Path.h" 26 #include "llvm/Support/SourceMgr.h" 27 #include <cassert> 28 #include <cstdint> 29 #include <ctime> 30 #include <memory> 31 #include <optional> 32 #include <stack> 33 #include <string> 34 #include <system_error> 35 #include <utility> 36 #include <vector> 37 38 namespace llvm { 39 40 class MemoryBuffer; 41 class MemoryBufferRef; 42 class Twine; 43 44 namespace vfs { 45 46 /// The result of a \p status operation. 47 class Status { 48 std::string Name; 49 llvm::sys::fs::UniqueID UID; 50 llvm::sys::TimePoint<> MTime; 51 uint32_t User; 52 uint32_t Group; 53 uint64_t Size; 54 llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::status_error; 55 llvm::sys::fs::perms Perms; 56 57 public: 58 // FIXME: remove when files support multiple names 59 bool IsVFSMapped = false; 60 61 /// Whether this entity has an external path different from the virtual path, 62 /// and the external path is exposed by leaking it through the abstraction. 63 /// For example, a RedirectingFileSystem will set this for paths where 64 /// UseExternalName is true. 65 /// 66 /// FIXME: Currently the external path is exposed by replacing the virtual 67 /// path in this Status object. Instead, we should leave the path in the 68 /// Status intact (matching the requested virtual path) - see 69 /// FileManager::getFileRef for how how we plan to fix this. 70 bool ExposesExternalVFSPath = false; 71 72 Status() = default; 73 Status(const llvm::sys::fs::file_status &Status); 74 Status(const Twine &Name, llvm::sys::fs::UniqueID UID, 75 llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group, 76 uint64_t Size, llvm::sys::fs::file_type Type, 77 llvm::sys::fs::perms Perms); 78 79 /// Get a copy of a Status with a different size. 80 static Status copyWithNewSize(const Status &In, uint64_t NewSize); 81 /// Get a copy of a Status with a different name. 82 static Status copyWithNewName(const Status &In, const Twine &NewName); 83 static Status copyWithNewName(const llvm::sys::fs::file_status &In, 84 const Twine &NewName); 85 86 /// Returns the name that should be used for this file or directory. getName()87 StringRef getName() const { return Name; } 88 89 /// @name Status interface from llvm::sys::fs 90 /// @{ getType()91 llvm::sys::fs::file_type getType() const { return Type; } getPermissions()92 llvm::sys::fs::perms getPermissions() const { return Perms; } getLastModificationTime()93 llvm::sys::TimePoint<> getLastModificationTime() const { return MTime; } getUniqueID()94 llvm::sys::fs::UniqueID getUniqueID() const { return UID; } getUser()95 uint32_t getUser() const { return User; } getGroup()96 uint32_t getGroup() const { return Group; } getSize()97 uint64_t getSize() const { return Size; } 98 /// @} 99 /// @name Status queries 100 /// These are static queries in llvm::sys::fs. 101 /// @{ 102 bool equivalent(const Status &Other) const; 103 bool isDirectory() const; 104 bool isRegularFile() const; 105 bool isOther() const; 106 bool isSymlink() const; 107 bool isStatusKnown() const; 108 bool exists() const; 109 /// @} 110 }; 111 112 /// Represents an open file. 113 class File { 114 public: 115 /// Destroy the file after closing it (if open). 116 /// Sub-classes should generally call close() inside their destructors. We 117 /// cannot do that from the base class, since close is virtual. 118 virtual ~File(); 119 120 /// Get the status of the file. 121 virtual llvm::ErrorOr<Status> status() = 0; 122 123 /// Get the name of the file getName()124 virtual llvm::ErrorOr<std::string> getName() { 125 if (auto Status = status()) 126 return Status->getName().str(); 127 else 128 return Status.getError(); 129 } 130 131 /// Get the contents of the file as a \p MemoryBuffer. 132 virtual llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 133 getBuffer(const Twine &Name, int64_t FileSize = -1, 134 bool RequiresNullTerminator = true, bool IsVolatile = false) = 0; 135 136 /// Closes the file. 137 virtual std::error_code close() = 0; 138 139 // Get the same file with a different path. 140 static ErrorOr<std::unique_ptr<File>> 141 getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P); 142 143 protected: 144 // Set the file's underlying path. setPath(const Twine & Path)145 virtual void setPath(const Twine &Path) {} 146 }; 147 148 /// A member of a directory, yielded by a directory_iterator. 149 /// Only information available on most platforms is included. 150 class directory_entry { 151 std::string Path; 152 llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::type_unknown; 153 154 public: 155 directory_entry() = default; directory_entry(std::string Path,llvm::sys::fs::file_type Type)156 directory_entry(std::string Path, llvm::sys::fs::file_type Type) 157 : Path(std::move(Path)), Type(Type) {} 158 path()159 llvm::StringRef path() const { return Path; } type()160 llvm::sys::fs::file_type type() const { return Type; } 161 }; 162 163 namespace detail { 164 165 /// An interface for virtual file systems to provide an iterator over the 166 /// (non-recursive) contents of a directory. 167 struct DirIterImpl { 168 virtual ~DirIterImpl(); 169 170 /// Sets \c CurrentEntry to the next entry in the directory on success, 171 /// to directory_entry() at end, or returns a system-defined \c error_code. 172 virtual std::error_code increment() = 0; 173 174 directory_entry CurrentEntry; 175 }; 176 177 } // namespace detail 178 179 /// An input iterator over the entries in a virtual path, similar to 180 /// llvm::sys::fs::directory_iterator. 181 class directory_iterator { 182 std::shared_ptr<detail::DirIterImpl> Impl; // Input iterator semantics on copy 183 184 public: directory_iterator(std::shared_ptr<detail::DirIterImpl> I)185 directory_iterator(std::shared_ptr<detail::DirIterImpl> I) 186 : Impl(std::move(I)) { 187 assert(Impl.get() != nullptr && "requires non-null implementation"); 188 if (Impl->CurrentEntry.path().empty()) 189 Impl.reset(); // Normalize the end iterator to Impl == nullptr. 190 } 191 192 /// Construct an 'end' iterator. 193 directory_iterator() = default; 194 195 /// Equivalent to operator++, with an error code. increment(std::error_code & EC)196 directory_iterator &increment(std::error_code &EC) { 197 assert(Impl && "attempting to increment past end"); 198 EC = Impl->increment(); 199 if (Impl->CurrentEntry.path().empty()) 200 Impl.reset(); // Normalize the end iterator to Impl == nullptr. 201 return *this; 202 } 203 204 const directory_entry &operator*() const { return Impl->CurrentEntry; } 205 const directory_entry *operator->() const { return &Impl->CurrentEntry; } 206 207 bool operator==(const directory_iterator &RHS) const { 208 if (Impl && RHS.Impl) 209 return Impl->CurrentEntry.path() == RHS.Impl->CurrentEntry.path(); 210 return !Impl && !RHS.Impl; 211 } 212 bool operator!=(const directory_iterator &RHS) const { 213 return !(*this == RHS); 214 } 215 }; 216 217 class FileSystem; 218 219 namespace detail { 220 221 /// Keeps state for the recursive_directory_iterator. 222 struct RecDirIterState { 223 std::stack<directory_iterator, std::vector<directory_iterator>> Stack; 224 bool HasNoPushRequest = false; 225 }; 226 227 } // end namespace detail 228 229 /// An input iterator over the recursive contents of a virtual path, 230 /// similar to llvm::sys::fs::recursive_directory_iterator. 231 class recursive_directory_iterator { 232 FileSystem *FS; 233 std::shared_ptr<detail::RecDirIterState> 234 State; // Input iterator semantics on copy. 235 236 public: 237 recursive_directory_iterator(FileSystem &FS, const Twine &Path, 238 std::error_code &EC); 239 240 /// Construct an 'end' iterator. 241 recursive_directory_iterator() = default; 242 243 /// Equivalent to operator++, with an error code. 244 recursive_directory_iterator &increment(std::error_code &EC); 245 246 const directory_entry &operator*() const { return *State->Stack.top(); } 247 const directory_entry *operator->() const { return &*State->Stack.top(); } 248 249 bool operator==(const recursive_directory_iterator &Other) const { 250 return State == Other.State; // identity 251 } 252 bool operator!=(const recursive_directory_iterator &RHS) const { 253 return !(*this == RHS); 254 } 255 256 /// Gets the current level. Starting path is at level 0. level()257 int level() const { 258 assert(!State->Stack.empty() && 259 "Cannot get level without any iteration state"); 260 return State->Stack.size() - 1; 261 } 262 no_push()263 void no_push() { State->HasNoPushRequest = true; } 264 }; 265 266 /// The virtual file system interface. 267 class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem> { 268 public: 269 virtual ~FileSystem(); 270 271 /// Get the status of the entry at \p Path, if one exists. 272 virtual llvm::ErrorOr<Status> status(const Twine &Path) = 0; 273 274 /// Get a \p File object for the file at \p Path, if one exists. 275 virtual llvm::ErrorOr<std::unique_ptr<File>> 276 openFileForRead(const Twine &Path) = 0; 277 278 /// This is a convenience method that opens a file, gets its content and then 279 /// closes the file. 280 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 281 getBufferForFile(const Twine &Name, int64_t FileSize = -1, 282 bool RequiresNullTerminator = true, bool IsVolatile = false); 283 284 /// Get a directory_iterator for \p Dir. 285 /// \note The 'end' iterator is directory_iterator(). 286 virtual directory_iterator dir_begin(const Twine &Dir, 287 std::error_code &EC) = 0; 288 289 /// Set the working directory. This will affect all following operations on 290 /// this file system and may propagate down for nested file systems. 291 virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0; 292 293 /// Get the working directory of this file system. 294 virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0; 295 296 /// Gets real path of \p Path e.g. collapse all . and .. patterns, resolve 297 /// symlinks. For real file system, this uses `llvm::sys::fs::real_path`. 298 /// This returns errc::operation_not_permitted if not implemented by subclass. 299 virtual std::error_code getRealPath(const Twine &Path, 300 SmallVectorImpl<char> &Output) const; 301 302 /// Check whether a file exists. Provided for convenience. 303 bool exists(const Twine &Path); 304 305 /// Is the file mounted on a local filesystem? 306 virtual std::error_code isLocal(const Twine &Path, bool &Result); 307 308 /// Make \a Path an absolute path. 309 /// 310 /// Makes \a Path absolute using the current directory if it is not already. 311 /// An empty \a Path will result in the current directory. 312 /// 313 /// /absolute/path => /absolute/path 314 /// relative/../path => <current-directory>/relative/../path 315 /// 316 /// \param Path A path that is modified to be an absolute path. 317 /// \returns success if \a path has been made absolute, otherwise a 318 /// platform-specific error_code. 319 virtual std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const; 320 321 enum class PrintType { Summary, Contents, RecursiveContents }; 322 void print(raw_ostream &OS, PrintType Type = PrintType::Contents, 323 unsigned IndentLevel = 0) const { 324 printImpl(OS, Type, IndentLevel); 325 } 326 327 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 328 LLVM_DUMP_METHOD void dump() const; 329 #endif 330 331 protected: printImpl(raw_ostream & OS,PrintType Type,unsigned IndentLevel)332 virtual void printImpl(raw_ostream &OS, PrintType Type, 333 unsigned IndentLevel) const { 334 printIndent(OS, IndentLevel); 335 OS << "FileSystem\n"; 336 } 337 printIndent(raw_ostream & OS,unsigned IndentLevel)338 void printIndent(raw_ostream &OS, unsigned IndentLevel) const { 339 for (unsigned i = 0; i < IndentLevel; ++i) 340 OS << " "; 341 } 342 }; 343 344 /// Gets an \p vfs::FileSystem for the 'real' file system, as seen by 345 /// the operating system. 346 /// The working directory is linked to the process's working directory. 347 /// (This is usually thread-hostile). 348 IntrusiveRefCntPtr<FileSystem> getRealFileSystem(); 349 350 /// Create an \p vfs::FileSystem for the 'real' file system, as seen by 351 /// the operating system. 352 /// It has its own working directory, independent of (but initially equal to) 353 /// that of the process. 354 std::unique_ptr<FileSystem> createPhysicalFileSystem(); 355 356 /// A file system that allows overlaying one \p AbstractFileSystem on top 357 /// of another. 358 /// 359 /// Consists of a stack of >=1 \p FileSystem objects, which are treated as being 360 /// one merged file system. When there is a directory that exists in more than 361 /// one file system, the \p OverlayFileSystem contains a directory containing 362 /// the union of their contents. The attributes (permissions, etc.) of the 363 /// top-most (most recently added) directory are used. When there is a file 364 /// that exists in more than one file system, the file in the top-most file 365 /// system overrides the other(s). 366 class OverlayFileSystem : public FileSystem { 367 using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>; 368 369 /// The stack of file systems, implemented as a list in order of 370 /// their addition. 371 FileSystemList FSList; 372 373 public: 374 OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base); 375 376 /// Pushes a file system on top of the stack. 377 void pushOverlay(IntrusiveRefCntPtr<FileSystem> FS); 378 379 llvm::ErrorOr<Status> status(const Twine &Path) override; 380 llvm::ErrorOr<std::unique_ptr<File>> 381 openFileForRead(const Twine &Path) override; 382 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; 383 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override; 384 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 385 std::error_code isLocal(const Twine &Path, bool &Result) override; 386 std::error_code getRealPath(const Twine &Path, 387 SmallVectorImpl<char> &Output) const override; 388 389 using iterator = FileSystemList::reverse_iterator; 390 using const_iterator = FileSystemList::const_reverse_iterator; 391 using reverse_iterator = FileSystemList::iterator; 392 using const_reverse_iterator = FileSystemList::const_iterator; 393 using range = iterator_range<iterator>; 394 using const_range = iterator_range<const_iterator>; 395 396 /// Get an iterator pointing to the most recently added file system. overlays_begin()397 iterator overlays_begin() { return FSList.rbegin(); } overlays_begin()398 const_iterator overlays_begin() const { return FSList.rbegin(); } 399 400 /// Get an iterator pointing one-past the least recently added file system. overlays_end()401 iterator overlays_end() { return FSList.rend(); } overlays_end()402 const_iterator overlays_end() const { return FSList.rend(); } 403 404 /// Get an iterator pointing to the least recently added file system. overlays_rbegin()405 reverse_iterator overlays_rbegin() { return FSList.begin(); } overlays_rbegin()406 const_reverse_iterator overlays_rbegin() const { return FSList.begin(); } 407 408 /// Get an iterator pointing one-past the most recently added file system. overlays_rend()409 reverse_iterator overlays_rend() { return FSList.end(); } overlays_rend()410 const_reverse_iterator overlays_rend() const { return FSList.end(); } 411 overlays_range()412 range overlays_range() { return llvm::reverse(FSList); } overlays_range()413 const_range overlays_range() const { return llvm::reverse(FSList); } 414 415 protected: 416 void printImpl(raw_ostream &OS, PrintType Type, 417 unsigned IndentLevel) const override; 418 }; 419 420 /// By default, this delegates all calls to the underlying file system. This 421 /// is useful when derived file systems want to override some calls and still 422 /// proxy other calls. 423 class ProxyFileSystem : public FileSystem { 424 public: ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)425 explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS) 426 : FS(std::move(FS)) {} 427 status(const Twine & Path)428 llvm::ErrorOr<Status> status(const Twine &Path) override { 429 return FS->status(Path); 430 } 431 llvm::ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine & Path)432 openFileForRead(const Twine &Path) override { 433 return FS->openFileForRead(Path); 434 } dir_begin(const Twine & Dir,std::error_code & EC)435 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override { 436 return FS->dir_begin(Dir, EC); 437 } getCurrentWorkingDirectory()438 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override { 439 return FS->getCurrentWorkingDirectory(); 440 } setCurrentWorkingDirectory(const Twine & Path)441 std::error_code setCurrentWorkingDirectory(const Twine &Path) override { 442 return FS->setCurrentWorkingDirectory(Path); 443 } getRealPath(const Twine & Path,SmallVectorImpl<char> & Output)444 std::error_code getRealPath(const Twine &Path, 445 SmallVectorImpl<char> &Output) const override { 446 return FS->getRealPath(Path, Output); 447 } isLocal(const Twine & Path,bool & Result)448 std::error_code isLocal(const Twine &Path, bool &Result) override { 449 return FS->isLocal(Path, Result); 450 } 451 452 protected: getUnderlyingFS()453 FileSystem &getUnderlyingFS() const { return *FS; } 454 455 private: 456 IntrusiveRefCntPtr<FileSystem> FS; 457 458 virtual void anchor(); 459 }; 460 461 namespace detail { 462 463 class InMemoryDirectory; 464 class InMemoryNode; 465 466 struct NewInMemoryNodeInfo { 467 llvm::sys::fs::UniqueID DirUID; 468 StringRef Path; 469 StringRef Name; 470 time_t ModificationTime; 471 std::unique_ptr<llvm::MemoryBuffer> Buffer; 472 uint32_t User; 473 uint32_t Group; 474 llvm::sys::fs::file_type Type; 475 llvm::sys::fs::perms Perms; 476 477 Status makeStatus() const; 478 }; 479 480 class NamedNodeOrError { 481 ErrorOr<std::pair<llvm::SmallString<128>, const detail::InMemoryNode *>> 482 Value; 483 484 public: NamedNodeOrError(llvm::SmallString<128> Name,const detail::InMemoryNode * Node)485 NamedNodeOrError(llvm::SmallString<128> Name, 486 const detail::InMemoryNode *Node) 487 : Value(std::make_pair(Name, Node)) {} NamedNodeOrError(std::error_code EC)488 NamedNodeOrError(std::error_code EC) : Value(EC) {} NamedNodeOrError(llvm::errc EC)489 NamedNodeOrError(llvm::errc EC) : Value(EC) {} 490 getName()491 StringRef getName() const { return (*Value).first; } 492 explicit operator bool() const { return static_cast<bool>(Value); } error_code()493 operator std::error_code() const { return Value.getError(); } getError()494 std::error_code getError() const { return Value.getError(); } 495 const detail::InMemoryNode *operator*() const { return (*Value).second; } 496 }; 497 498 } // namespace detail 499 500 /// An in-memory file system. 501 class InMemoryFileSystem : public FileSystem { 502 std::unique_ptr<detail::InMemoryDirectory> Root; 503 std::string WorkingDirectory; 504 bool UseNormalizedPaths = true; 505 506 using MakeNodeFn = llvm::function_ref<std::unique_ptr<detail::InMemoryNode>( 507 detail::NewInMemoryNodeInfo)>; 508 509 /// Create node with \p MakeNode and add it into this filesystem at \p Path. 510 bool addFile(const Twine &Path, time_t ModificationTime, 511 std::unique_ptr<llvm::MemoryBuffer> Buffer, 512 std::optional<uint32_t> User, std::optional<uint32_t> Group, 513 std::optional<llvm::sys::fs::file_type> Type, 514 std::optional<llvm::sys::fs::perms> Perms, MakeNodeFn MakeNode); 515 516 /// Looks up the in-memory node for the path \p P. 517 /// If \p FollowFinalSymlink is true, the returned node is guaranteed to 518 /// not be a symlink and its path may differ from \p P. 519 detail::NamedNodeOrError lookupNode(const Twine &P, bool FollowFinalSymlink, 520 size_t SymlinkDepth = 0) const; 521 522 class DirIterator; 523 524 public: 525 explicit InMemoryFileSystem(bool UseNormalizedPaths = true); 526 ~InMemoryFileSystem() override; 527 528 /// Add a file containing a buffer or a directory to the VFS with a 529 /// path. The VFS owns the buffer. If present, User, Group, Type 530 /// and Perms apply to the newly-created file or directory. 531 /// \return true if the file or directory was successfully added, 532 /// false if the file or directory already exists in the file system with 533 /// different contents. 534 bool addFile(const Twine &Path, time_t ModificationTime, 535 std::unique_ptr<llvm::MemoryBuffer> Buffer, 536 std::optional<uint32_t> User = std::nullopt, 537 std::optional<uint32_t> Group = std::nullopt, 538 std::optional<llvm::sys::fs::file_type> Type = std::nullopt, 539 std::optional<llvm::sys::fs::perms> Perms = std::nullopt); 540 541 /// Add a hard link to a file. 542 /// 543 /// Here hard links are not intended to be fully equivalent to the classical 544 /// filesystem. Both the hard link and the file share the same buffer and 545 /// status (and thus have the same UniqueID). Because of this there is no way 546 /// to distinguish between the link and the file after the link has been 547 /// added. 548 /// 549 /// The \p Target path must be an existing file or a hardlink. The 550 /// \p NewLink file must not have been added before. The \p Target 551 /// path must not be a directory. The \p NewLink node is added as a hard 552 /// link which points to the resolved file of \p Target node. 553 /// \return true if the above condition is satisfied and hardlink was 554 /// successfully created, false otherwise. 555 bool addHardLink(const Twine &NewLink, const Twine &Target); 556 557 /// Arbitrary max depth to search through symlinks. We can get into problems 558 /// if a link links to a link that links back to the link, for example. 559 static constexpr size_t MaxSymlinkDepth = 16; 560 561 /// Add a symbolic link. Unlike a HardLink, because \p Target doesn't need 562 /// to refer to a file (or refer to anything, as it happens). Also, an 563 /// in-memory directory for \p Target isn't automatically created. 564 bool 565 addSymbolicLink(const Twine &NewLink, const Twine &Target, 566 time_t ModificationTime, 567 std::optional<uint32_t> User = std::nullopt, 568 std::optional<uint32_t> Group = std::nullopt, 569 std::optional<llvm::sys::fs::perms> Perms = std::nullopt); 570 571 /// Add a buffer to the VFS with a path. The VFS does not own the buffer. 572 /// If present, User, Group, Type and Perms apply to the newly-created file 573 /// or directory. 574 /// \return true if the file or directory was successfully added, 575 /// false if the file or directory already exists in the file system with 576 /// different contents. 577 bool addFileNoOwn(const Twine &Path, time_t ModificationTime, 578 const llvm::MemoryBufferRef &Buffer, 579 std::optional<uint32_t> User = std::nullopt, 580 std::optional<uint32_t> Group = std::nullopt, 581 std::optional<llvm::sys::fs::file_type> Type = std::nullopt, 582 std::optional<llvm::sys::fs::perms> Perms = std::nullopt); 583 584 std::string toString() const; 585 586 /// Return true if this file system normalizes . and .. in paths. useNormalizedPaths()587 bool useNormalizedPaths() const { return UseNormalizedPaths; } 588 589 llvm::ErrorOr<Status> status(const Twine &Path) override; 590 llvm::ErrorOr<std::unique_ptr<File>> 591 openFileForRead(const Twine &Path) override; 592 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; 593 getCurrentWorkingDirectory()594 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override { 595 return WorkingDirectory; 596 } 597 /// Canonicalizes \p Path by combining with the current working 598 /// directory and normalizing the path (e.g. remove dots). If the current 599 /// working directory is not set, this returns errc::operation_not_permitted. 600 /// 601 /// This doesn't resolve symlinks as they are not supported in in-memory file 602 /// system. 603 std::error_code getRealPath(const Twine &Path, 604 SmallVectorImpl<char> &Output) const override; 605 std::error_code isLocal(const Twine &Path, bool &Result) override; 606 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 607 608 protected: 609 void printImpl(raw_ostream &OS, PrintType Type, 610 unsigned IndentLevel) const override; 611 }; 612 613 /// Get a globally unique ID for a virtual file or directory. 614 llvm::sys::fs::UniqueID getNextVirtualUniqueID(); 615 616 /// Gets a \p FileSystem for a virtual file system described in YAML 617 /// format. 618 std::unique_ptr<FileSystem> 619 getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer, 620 llvm::SourceMgr::DiagHandlerTy DiagHandler, 621 StringRef YAMLFilePath, void *DiagContext = nullptr, 622 IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem()); 623 624 struct YAMLVFSEntry { 625 template <typename T1, typename T2> 626 YAMLVFSEntry(T1 &&VPath, T2 &&RPath, bool IsDirectory = false) VPathYAMLVFSEntry627 : VPath(std::forward<T1>(VPath)), RPath(std::forward<T2>(RPath)), 628 IsDirectory(IsDirectory) {} 629 std::string VPath; 630 std::string RPath; 631 bool IsDirectory = false; 632 }; 633 634 class RedirectingFSDirIterImpl; 635 class RedirectingFileSystemParser; 636 637 /// A virtual file system parsed from a YAML file. 638 /// 639 /// Currently, this class allows creating virtual files and directories. Virtual 640 /// files map to existing external files in \c ExternalFS, and virtual 641 /// directories may either map to existing directories in \c ExternalFS or list 642 /// their contents in the form of other virtual directories and/or files. 643 /// 644 /// The basic structure of the parsed file is: 645 /// \verbatim 646 /// { 647 /// 'version': <version number>, 648 /// <optional configuration> 649 /// 'roots': [ 650 /// <directory entries> 651 /// ] 652 /// } 653 /// \endverbatim 654 /// 655 /// The roots may be absolute or relative. If relative they will be made 656 /// absolute against either current working directory or the directory where 657 /// the Overlay YAML file is located, depending on the 'root-relative' 658 /// configuration. 659 /// 660 /// All configuration options are optional. 661 /// 'case-sensitive': <boolean, default=(true for Posix, false for Windows)> 662 /// 'use-external-names': <boolean, default=true> 663 /// 'root-relative': <string, one of 'cwd' or 'overlay-dir', default='cwd'> 664 /// 'overlay-relative': <boolean, default=false> 665 /// 'fallthrough': <boolean, default=true, deprecated - use 'redirecting-with' 666 /// instead> 667 /// 'redirecting-with': <string, one of 'fallthrough', 'fallback', or 668 /// 'redirect-only', default='fallthrough'> 669 /// 670 /// To clarify, 'root-relative' option will prepend the current working 671 /// directory, or the overlay directory to the 'roots->name' field only if 672 /// 'roots->name' is a relative path. On the other hand, when 'overlay-relative' 673 /// is set to 'true', external paths will always be prepended with the overlay 674 /// directory, even if external paths are not relative paths. The 675 /// 'root-relative' option has no interaction with the 'overlay-relative' 676 /// option. 677 /// 678 /// Virtual directories that list their contents are represented as 679 /// \verbatim 680 /// { 681 /// 'type': 'directory', 682 /// 'name': <string>, 683 /// 'contents': [ <file or directory entries> ] 684 /// } 685 /// \endverbatim 686 /// 687 /// The default attributes for such virtual directories are: 688 /// \verbatim 689 /// MTime = now() when created 690 /// Perms = 0777 691 /// User = Group = 0 692 /// Size = 0 693 /// UniqueID = unspecified unique value 694 /// \endverbatim 695 /// 696 /// When a path prefix matches such a directory, the next component in the path 697 /// is matched against the entries in the 'contents' array. 698 /// 699 /// Re-mapped directories, on the other hand, are represented as 700 /// /// \verbatim 701 /// { 702 /// 'type': 'directory-remap', 703 /// 'name': <string>, 704 /// 'use-external-name': <boolean>, # Optional 705 /// 'external-contents': <path to external directory> 706 /// } 707 /// \endverbatim 708 /// 709 /// and inherit their attributes from the external directory. When a path 710 /// prefix matches such an entry, the unmatched components are appended to the 711 /// 'external-contents' path, and the resulting path is looked up in the 712 /// external file system instead. 713 /// 714 /// Re-mapped files are represented as 715 /// \verbatim 716 /// { 717 /// 'type': 'file', 718 /// 'name': <string>, 719 /// 'use-external-name': <boolean>, # Optional 720 /// 'external-contents': <path to external file> 721 /// } 722 /// \endverbatim 723 /// 724 /// Their attributes and file contents are determined by looking up the file at 725 /// their 'external-contents' path in the external file system. 726 /// 727 /// For 'file', 'directory' and 'directory-remap' entries the 'name' field may 728 /// contain multiple path components (e.g. /path/to/file). However, any 729 /// directory in such a path that contains more than one child must be uniquely 730 /// represented by a 'directory' entry. 731 /// 732 /// When the 'use-external-name' field is set, calls to \a vfs::File::status() 733 /// give the external (remapped) filesystem name instead of the name the file 734 /// was accessed by. This is an intentional leak through the \a 735 /// RedirectingFileSystem abstraction layer. It enables clients to discover 736 /// (and use) the external file location when communicating with users or tools 737 /// that don't use the same VFS overlay. 738 /// 739 /// FIXME: 'use-external-name' causes behaviour that's inconsistent with how 740 /// "real" filesystems behave. Maybe there should be a separate channel for 741 /// this information. 742 class RedirectingFileSystem : public vfs::FileSystem { 743 public: 744 enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File }; 745 enum NameKind { NK_NotSet, NK_External, NK_Virtual }; 746 747 /// The type of redirection to perform. 748 enum class RedirectKind { 749 /// Lookup the redirected path first (ie. the one specified in 750 /// 'external-contents') and if that fails "fallthrough" to a lookup of the 751 /// originally provided path. 752 Fallthrough, 753 /// Lookup the provided path first and if that fails, "fallback" to a 754 /// lookup of the redirected path. 755 Fallback, 756 /// Only lookup the redirected path, do not lookup the originally provided 757 /// path. 758 RedirectOnly 759 }; 760 761 /// The type of relative path used by Roots. 762 enum class RootRelativeKind { 763 /// The roots are relative to the current working directory. 764 CWD, 765 /// The roots are relative to the directory where the Overlay YAML file 766 // locates. 767 OverlayDir 768 }; 769 770 /// A single file or directory in the VFS. 771 class Entry { 772 EntryKind Kind; 773 std::string Name; 774 775 public: Entry(EntryKind K,StringRef Name)776 Entry(EntryKind K, StringRef Name) : Kind(K), Name(Name) {} 777 virtual ~Entry() = default; 778 getName()779 StringRef getName() const { return Name; } getKind()780 EntryKind getKind() const { return Kind; } 781 }; 782 783 /// A directory in the vfs with explicitly specified contents. 784 class DirectoryEntry : public Entry { 785 std::vector<std::unique_ptr<Entry>> Contents; 786 Status S; 787 788 public: 789 /// Constructs a directory entry with explicitly specified contents. DirectoryEntry(StringRef Name,std::vector<std::unique_ptr<Entry>> Contents,Status S)790 DirectoryEntry(StringRef Name, std::vector<std::unique_ptr<Entry>> Contents, 791 Status S) 792 : Entry(EK_Directory, Name), Contents(std::move(Contents)), 793 S(std::move(S)) {} 794 795 /// Constructs an empty directory entry. DirectoryEntry(StringRef Name,Status S)796 DirectoryEntry(StringRef Name, Status S) 797 : Entry(EK_Directory, Name), S(std::move(S)) {} 798 getStatus()799 Status getStatus() { return S; } 800 addContent(std::unique_ptr<Entry> Content)801 void addContent(std::unique_ptr<Entry> Content) { 802 Contents.push_back(std::move(Content)); 803 } 804 getLastContent()805 Entry *getLastContent() const { return Contents.back().get(); } 806 807 using iterator = decltype(Contents)::iterator; 808 contents_begin()809 iterator contents_begin() { return Contents.begin(); } contents_end()810 iterator contents_end() { return Contents.end(); } 811 classof(const Entry * E)812 static bool classof(const Entry *E) { return E->getKind() == EK_Directory; } 813 }; 814 815 /// A file or directory in the vfs that is mapped to a file or directory in 816 /// the external filesystem. 817 class RemapEntry : public Entry { 818 std::string ExternalContentsPath; 819 NameKind UseName; 820 821 protected: RemapEntry(EntryKind K,StringRef Name,StringRef ExternalContentsPath,NameKind UseName)822 RemapEntry(EntryKind K, StringRef Name, StringRef ExternalContentsPath, 823 NameKind UseName) 824 : Entry(K, Name), ExternalContentsPath(ExternalContentsPath), 825 UseName(UseName) {} 826 827 public: getExternalContentsPath()828 StringRef getExternalContentsPath() const { return ExternalContentsPath; } 829 830 /// Whether to use the external path as the name for this file or directory. useExternalName(bool GlobalUseExternalName)831 bool useExternalName(bool GlobalUseExternalName) const { 832 return UseName == NK_NotSet ? GlobalUseExternalName 833 : (UseName == NK_External); 834 } 835 getUseName()836 NameKind getUseName() const { return UseName; } 837 classof(const Entry * E)838 static bool classof(const Entry *E) { 839 switch (E->getKind()) { 840 case EK_DirectoryRemap: 841 [[fallthrough]]; 842 case EK_File: 843 return true; 844 case EK_Directory: 845 return false; 846 } 847 llvm_unreachable("invalid entry kind"); 848 } 849 }; 850 851 /// A directory in the vfs that maps to a directory in the external file 852 /// system. 853 class DirectoryRemapEntry : public RemapEntry { 854 public: DirectoryRemapEntry(StringRef Name,StringRef ExternalContentsPath,NameKind UseName)855 DirectoryRemapEntry(StringRef Name, StringRef ExternalContentsPath, 856 NameKind UseName) 857 : RemapEntry(EK_DirectoryRemap, Name, ExternalContentsPath, UseName) {} 858 classof(const Entry * E)859 static bool classof(const Entry *E) { 860 return E->getKind() == EK_DirectoryRemap; 861 } 862 }; 863 864 /// A file in the vfs that maps to a file in the external file system. 865 class FileEntry : public RemapEntry { 866 public: FileEntry(StringRef Name,StringRef ExternalContentsPath,NameKind UseName)867 FileEntry(StringRef Name, StringRef ExternalContentsPath, NameKind UseName) 868 : RemapEntry(EK_File, Name, ExternalContentsPath, UseName) {} 869 classof(const Entry * E)870 static bool classof(const Entry *E) { return E->getKind() == EK_File; } 871 }; 872 873 /// Represents the result of a path lookup into the RedirectingFileSystem. 874 struct LookupResult { 875 /// The entry the looked-up path corresponds to. 876 Entry *E; 877 878 private: 879 /// When the found Entry is a DirectoryRemapEntry, stores the path in the 880 /// external file system that the looked-up path in the virtual file system 881 // corresponds to. 882 std::optional<std::string> ExternalRedirect; 883 884 public: 885 LookupResult(Entry *E, sys::path::const_iterator Start, 886 sys::path::const_iterator End); 887 888 /// If the found Entry maps the the input path to a path in the external 889 /// file system (i.e. it is a FileEntry or DirectoryRemapEntry), returns 890 /// that path. getExternalRedirectLookupResult891 std::optional<StringRef> getExternalRedirect() const { 892 if (isa<DirectoryRemapEntry>(E)) 893 return StringRef(*ExternalRedirect); 894 if (auto *FE = dyn_cast<FileEntry>(E)) 895 return FE->getExternalContentsPath(); 896 return std::nullopt; 897 } 898 }; 899 900 private: 901 friend class RedirectingFSDirIterImpl; 902 friend class RedirectingFileSystemParser; 903 904 /// Canonicalize path by removing ".", "..", "./", components. This is 905 /// a VFS request, do not bother about symlinks in the path components 906 /// but canonicalize in order to perform the correct entry search. 907 std::error_code makeCanonical(SmallVectorImpl<char> &Path) const; 908 909 /// Get the File status, or error, from the underlying external file system. 910 /// This returns the status with the originally requested name, while looking 911 /// up the entry using the canonical path. 912 ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath, 913 const Twine &OriginalPath) const; 914 915 /// Make \a Path an absolute path. 916 /// 917 /// Makes \a Path absolute using the \a WorkingDir if it is not already. 918 /// 919 /// /absolute/path => /absolute/path 920 /// relative/../path => <WorkingDir>/relative/../path 921 /// 922 /// \param WorkingDir A path that will be used as the base Dir if \a Path 923 /// is not already absolute. 924 /// \param Path A path that is modified to be an absolute path. 925 /// \returns success if \a path has been made absolute, otherwise a 926 /// platform-specific error_code. 927 std::error_code makeAbsolute(StringRef WorkingDir, 928 SmallVectorImpl<char> &Path) const; 929 930 // In a RedirectingFileSystem, keys can be specified in Posix or Windows 931 // style (or even a mixture of both), so this comparison helper allows 932 // slashes (representing a root) to match backslashes (and vice versa). Note 933 // that, other than the root, path components should not contain slashes or 934 // backslashes. pathComponentMatches(llvm::StringRef lhs,llvm::StringRef rhs)935 bool pathComponentMatches(llvm::StringRef lhs, llvm::StringRef rhs) const { 936 if ((CaseSensitive ? lhs.equals(rhs) : lhs.equals_insensitive(rhs))) 937 return true; 938 return (lhs == "/" && rhs == "\\") || (lhs == "\\" && rhs == "/"); 939 } 940 941 /// The root(s) of the virtual file system. 942 std::vector<std::unique_ptr<Entry>> Roots; 943 944 /// The current working directory of the file system. 945 std::string WorkingDirectory; 946 947 /// The file system to use for external references. 948 IntrusiveRefCntPtr<FileSystem> ExternalFS; 949 950 /// This represents the directory path that the YAML file is located. 951 /// This will be prefixed to each 'external-contents' if IsRelativeOverlay 952 /// is set. This will also be prefixed to each 'roots->name' if RootRelative 953 /// is set to RootRelativeKind::OverlayDir and the path is relative. 954 std::string OverlayFileDir; 955 956 /// @name Configuration 957 /// @{ 958 959 /// Whether to perform case-sensitive comparisons. 960 /// 961 /// Currently, case-insensitive matching only works correctly with ASCII. 962 bool CaseSensitive = is_style_posix(sys::path::Style::native); 963 964 /// IsRelativeOverlay marks whether a OverlayFileDir path must 965 /// be prefixed in every 'external-contents' when reading from YAML files. 966 bool IsRelativeOverlay = false; 967 968 /// Whether to use to use the value of 'external-contents' for the 969 /// names of files. This global value is overridable on a per-file basis. 970 bool UseExternalNames = true; 971 972 /// Determines the lookups to perform, as well as their order. See 973 /// \c RedirectKind for details. 974 RedirectKind Redirection = RedirectKind::Fallthrough; 975 976 /// Determine the prefix directory if the roots are relative paths. See 977 /// \c RootRelativeKind for details. 978 RootRelativeKind RootRelative = RootRelativeKind::CWD; 979 /// @} 980 981 RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS); 982 983 /// Looks up the path <tt>[Start, End)</tt> in \p From, possibly recursing 984 /// into the contents of \p From if it is a directory. Returns a LookupResult 985 /// giving the matched entry and, if that entry is a FileEntry or 986 /// DirectoryRemapEntry, the path it redirects to in the external file system. 987 ErrorOr<LookupResult> lookupPathImpl(llvm::sys::path::const_iterator Start, 988 llvm::sys::path::const_iterator End, 989 Entry *From) const; 990 991 /// Get the status for a path with the provided \c LookupResult. 992 ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath, 993 const LookupResult &Result); 994 995 public: 996 /// Looks up \p Path in \c Roots and returns a LookupResult giving the 997 /// matched entry and, if the entry was a FileEntry or DirectoryRemapEntry, 998 /// the path it redirects to in the external file system. 999 ErrorOr<LookupResult> lookupPath(StringRef Path) const; 1000 1001 /// Parses \p Buffer, which is expected to be in YAML format and 1002 /// returns a virtual file system representing its contents. 1003 static std::unique_ptr<RedirectingFileSystem> 1004 create(std::unique_ptr<MemoryBuffer> Buffer, 1005 SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath, 1006 void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS); 1007 1008 /// Redirect each of the remapped files from first to second. 1009 static std::unique_ptr<RedirectingFileSystem> 1010 create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles, 1011 bool UseExternalNames, FileSystem &ExternalFS); 1012 1013 ErrorOr<Status> status(const Twine &Path) override; 1014 ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override; 1015 1016 std::error_code getRealPath(const Twine &Path, 1017 SmallVectorImpl<char> &Output) const override; 1018 1019 llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override; 1020 1021 std::error_code setCurrentWorkingDirectory(const Twine &Path) override; 1022 1023 std::error_code isLocal(const Twine &Path, bool &Result) override; 1024 1025 std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const override; 1026 1027 directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; 1028 1029 void setOverlayFileDir(StringRef PrefixDir); 1030 1031 StringRef getOverlayFileDir() const; 1032 1033 /// Sets the redirection kind to \c Fallthrough if true or \c RedirectOnly 1034 /// otherwise. Will removed in the future, use \c setRedirection instead. 1035 void setFallthrough(bool Fallthrough); 1036 1037 void setRedirection(RedirectingFileSystem::RedirectKind Kind); 1038 1039 std::vector<llvm::StringRef> getRoots() const; 1040 1041 void printEntry(raw_ostream &OS, Entry *E, unsigned IndentLevel = 0) const; 1042 1043 protected: 1044 void printImpl(raw_ostream &OS, PrintType Type, 1045 unsigned IndentLevel) const override; 1046 }; 1047 1048 /// Collect all pairs of <virtual path, real path> entries from the 1049 /// \p YAMLFilePath. This is used by the module dependency collector to forward 1050 /// the entries into the reproducer output VFS YAML file. 1051 void collectVFSFromYAML( 1052 std::unique_ptr<llvm::MemoryBuffer> Buffer, 1053 llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath, 1054 SmallVectorImpl<YAMLVFSEntry> &CollectedEntries, 1055 void *DiagContext = nullptr, 1056 IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem()); 1057 1058 class YAMLVFSWriter { 1059 std::vector<YAMLVFSEntry> Mappings; 1060 std::optional<bool> IsCaseSensitive; 1061 std::optional<bool> IsOverlayRelative; 1062 std::optional<bool> UseExternalNames; 1063 std::string OverlayDir; 1064 1065 void addEntry(StringRef VirtualPath, StringRef RealPath, bool IsDirectory); 1066 1067 public: 1068 YAMLVFSWriter() = default; 1069 1070 void addFileMapping(StringRef VirtualPath, StringRef RealPath); 1071 void addDirectoryMapping(StringRef VirtualPath, StringRef RealPath); 1072 setCaseSensitivity(bool CaseSensitive)1073 void setCaseSensitivity(bool CaseSensitive) { 1074 IsCaseSensitive = CaseSensitive; 1075 } 1076 setUseExternalNames(bool UseExtNames)1077 void setUseExternalNames(bool UseExtNames) { UseExternalNames = UseExtNames; } 1078 setOverlayDir(StringRef OverlayDirectory)1079 void setOverlayDir(StringRef OverlayDirectory) { 1080 IsOverlayRelative = true; 1081 OverlayDir.assign(OverlayDirectory.str()); 1082 } 1083 getMappings()1084 const std::vector<YAMLVFSEntry> &getMappings() const { return Mappings; } 1085 1086 void write(llvm::raw_ostream &OS); 1087 }; 1088 1089 } // namespace vfs 1090 } // namespace llvm 1091 1092 #endif // LLVM_SUPPORT_VIRTUALFILESYSTEM_H 1093