1 //===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the virtual file system interface vfs::FileSystem.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_SUPPORT_VIRTUALFILESYSTEM_H
15 #define LLVM_SUPPORT_VIRTUALFILESYSTEM_H
16 
17 #include "llvm/ADT/IntrusiveRefCntPtr.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/STLFunctionalExtras.h"
21 #include "llvm/Support/Chrono.h"
22 #include "llvm/Support/ErrorOr.h"
23 #include "llvm/Support/Errc.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/Path.h"
26 #include "llvm/Support/SourceMgr.h"
27 #include <cassert>
28 #include <cstdint>
29 #include <ctime>
30 #include <memory>
31 #include <optional>
32 #include <stack>
33 #include <string>
34 #include <system_error>
35 #include <utility>
36 #include <vector>
37 
38 namespace llvm {
39 
40 class MemoryBuffer;
41 class MemoryBufferRef;
42 class Twine;
43 
44 namespace vfs {
45 
46 /// The result of a \p status operation.
47 class Status {
48   std::string Name;
49   llvm::sys::fs::UniqueID UID;
50   llvm::sys::TimePoint<> MTime;
51   uint32_t User;
52   uint32_t Group;
53   uint64_t Size;
54   llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::status_error;
55   llvm::sys::fs::perms Perms;
56 
57 public:
58   // FIXME: remove when files support multiple names
59   bool IsVFSMapped = false;
60 
61   /// Whether this entity has an external path different from the virtual path,
62   /// and the external path is exposed by leaking it through the abstraction.
63   /// For example, a RedirectingFileSystem will set this for paths where
64   /// UseExternalName is true.
65   ///
66   /// FIXME: Currently the external path is exposed by replacing the virtual
67   /// path in this Status object. Instead, we should leave the path in the
68   /// Status intact (matching the requested virtual path) - see
69   /// FileManager::getFileRef for how we plan to fix this.
70   bool ExposesExternalVFSPath = false;
71 
72   Status() = default;
73   Status(const llvm::sys::fs::file_status &Status);
74   Status(const Twine &Name, llvm::sys::fs::UniqueID UID,
75          llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group,
76          uint64_t Size, llvm::sys::fs::file_type Type,
77          llvm::sys::fs::perms Perms);
78 
79   /// Get a copy of a Status with a different size.
80   static Status copyWithNewSize(const Status &In, uint64_t NewSize);
81   /// Get a copy of a Status with a different name.
82   static Status copyWithNewName(const Status &In, const Twine &NewName);
83   static Status copyWithNewName(const llvm::sys::fs::file_status &In,
84                                 const Twine &NewName);
85 
86   /// Returns the name that should be used for this file or directory.
getName()87   StringRef getName() const { return Name; }
88 
89   /// @name Status interface from llvm::sys::fs
90   /// @{
getType()91   llvm::sys::fs::file_type getType() const { return Type; }
getPermissions()92   llvm::sys::fs::perms getPermissions() const { return Perms; }
getLastModificationTime()93   llvm::sys::TimePoint<> getLastModificationTime() const { return MTime; }
getUniqueID()94   llvm::sys::fs::UniqueID getUniqueID() const { return UID; }
getUser()95   uint32_t getUser() const { return User; }
getGroup()96   uint32_t getGroup() const { return Group; }
getSize()97   uint64_t getSize() const { return Size; }
98   /// @}
99   /// @name Status queries
100   /// These are static queries in llvm::sys::fs.
101   /// @{
102   bool equivalent(const Status &Other) const;
103   bool isDirectory() const;
104   bool isRegularFile() const;
105   bool isOther() const;
106   bool isSymlink() const;
107   bool isStatusKnown() const;
108   bool exists() const;
109   /// @}
110 };
111 
112 /// Represents an open file.
113 class File {
114 public:
115   /// Destroy the file after closing it (if open).
116   /// Sub-classes should generally call close() inside their destructors.  We
117   /// cannot do that from the base class, since close is virtual.
118   virtual ~File();
119 
120   /// Get the status of the file.
121   virtual llvm::ErrorOr<Status> status() = 0;
122 
123   /// Get the name of the file
getName()124   virtual llvm::ErrorOr<std::string> getName() {
125     if (auto Status = status())
126       return Status->getName().str();
127     else
128       return Status.getError();
129   }
130 
131   /// Get the contents of the file as a \p MemoryBuffer.
132   virtual llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
133   getBuffer(const Twine &Name, int64_t FileSize = -1,
134             bool RequiresNullTerminator = true, bool IsVolatile = false) = 0;
135 
136   /// Closes the file.
137   virtual std::error_code close() = 0;
138 
139   // Get the same file with a different path.
140   static ErrorOr<std::unique_ptr<File>>
141   getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P);
142 
143 protected:
144   // Set the file's underlying path.
setPath(const Twine & Path)145   virtual void setPath(const Twine &Path) {}
146 };
147 
148 /// A member of a directory, yielded by a directory_iterator.
149 /// Only information available on most platforms is included.
150 class directory_entry {
151   std::string Path;
152   llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::type_unknown;
153 
154 public:
155   directory_entry() = default;
directory_entry(std::string Path,llvm::sys::fs::file_type Type)156   directory_entry(std::string Path, llvm::sys::fs::file_type Type)
157       : Path(std::move(Path)), Type(Type) {}
158 
path()159   llvm::StringRef path() const { return Path; }
type()160   llvm::sys::fs::file_type type() const { return Type; }
161 };
162 
163 namespace detail {
164 
165 /// An interface for virtual file systems to provide an iterator over the
166 /// (non-recursive) contents of a directory.
167 struct DirIterImpl {
168   virtual ~DirIterImpl();
169 
170   /// Sets \c CurrentEntry to the next entry in the directory on success,
171   /// to directory_entry() at end,  or returns a system-defined \c error_code.
172   virtual std::error_code increment() = 0;
173 
174   directory_entry CurrentEntry;
175 };
176 
177 } // namespace detail
178 
179 /// An input iterator over the entries in a virtual path, similar to
180 /// llvm::sys::fs::directory_iterator.
181 class directory_iterator {
182   std::shared_ptr<detail::DirIterImpl> Impl; // Input iterator semantics on copy
183 
184 public:
directory_iterator(std::shared_ptr<detail::DirIterImpl> I)185   directory_iterator(std::shared_ptr<detail::DirIterImpl> I)
186       : Impl(std::move(I)) {
187     assert(Impl.get() != nullptr && "requires non-null implementation");
188     if (Impl->CurrentEntry.path().empty())
189       Impl.reset(); // Normalize the end iterator to Impl == nullptr.
190   }
191 
192   /// Construct an 'end' iterator.
193   directory_iterator() = default;
194 
195   /// Equivalent to operator++, with an error code.
increment(std::error_code & EC)196   directory_iterator &increment(std::error_code &EC) {
197     assert(Impl && "attempting to increment past end");
198     EC = Impl->increment();
199     if (Impl->CurrentEntry.path().empty())
200       Impl.reset(); // Normalize the end iterator to Impl == nullptr.
201     return *this;
202   }
203 
204   const directory_entry &operator*() const { return Impl->CurrentEntry; }
205   const directory_entry *operator->() const { return &Impl->CurrentEntry; }
206 
207   bool operator==(const directory_iterator &RHS) const {
208     if (Impl && RHS.Impl)
209       return Impl->CurrentEntry.path() == RHS.Impl->CurrentEntry.path();
210     return !Impl && !RHS.Impl;
211   }
212   bool operator!=(const directory_iterator &RHS) const {
213     return !(*this == RHS);
214   }
215 };
216 
217 class FileSystem;
218 
219 namespace detail {
220 
221 /// Keeps state for the recursive_directory_iterator.
222 struct RecDirIterState {
223   std::stack<directory_iterator, std::vector<directory_iterator>> Stack;
224   bool HasNoPushRequest = false;
225 };
226 
227 } // end namespace detail
228 
229 /// An input iterator over the recursive contents of a virtual path,
230 /// similar to llvm::sys::fs::recursive_directory_iterator.
231 class recursive_directory_iterator {
232   FileSystem *FS;
233   std::shared_ptr<detail::RecDirIterState>
234       State; // Input iterator semantics on copy.
235 
236 public:
237   recursive_directory_iterator(FileSystem &FS, const Twine &Path,
238                                std::error_code &EC);
239 
240   /// Construct an 'end' iterator.
241   recursive_directory_iterator() = default;
242 
243   /// Equivalent to operator++, with an error code.
244   recursive_directory_iterator &increment(std::error_code &EC);
245 
246   const directory_entry &operator*() const { return *State->Stack.top(); }
247   const directory_entry *operator->() const { return &*State->Stack.top(); }
248 
249   bool operator==(const recursive_directory_iterator &Other) const {
250     return State == Other.State; // identity
251   }
252   bool operator!=(const recursive_directory_iterator &RHS) const {
253     return !(*this == RHS);
254   }
255 
256   /// Gets the current level. Starting path is at level 0.
level()257   int level() const {
258     assert(!State->Stack.empty() &&
259            "Cannot get level without any iteration state");
260     return State->Stack.size() - 1;
261   }
262 
no_push()263   void no_push() { State->HasNoPushRequest = true; }
264 };
265 
266 /// The virtual file system interface.
267 class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem> {
268 public:
269   virtual ~FileSystem();
270 
271   /// Get the status of the entry at \p Path, if one exists.
272   virtual llvm::ErrorOr<Status> status(const Twine &Path) = 0;
273 
274   /// Get a \p File object for the file at \p Path, if one exists.
275   virtual llvm::ErrorOr<std::unique_ptr<File>>
276   openFileForRead(const Twine &Path) = 0;
277 
278   /// This is a convenience method that opens a file, gets its content and then
279   /// closes the file.
280   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
281   getBufferForFile(const Twine &Name, int64_t FileSize = -1,
282                    bool RequiresNullTerminator = true, bool IsVolatile = false);
283 
284   /// Get a directory_iterator for \p Dir.
285   /// \note The 'end' iterator is directory_iterator().
286   virtual directory_iterator dir_begin(const Twine &Dir,
287                                        std::error_code &EC) = 0;
288 
289   /// Set the working directory. This will affect all following operations on
290   /// this file system and may propagate down for nested file systems.
291   virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0;
292 
293   /// Get the working directory of this file system.
294   virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0;
295 
296   /// Gets real path of \p Path e.g. collapse all . and .. patterns, resolve
297   /// symlinks. For real file system, this uses `llvm::sys::fs::real_path`.
298   /// This returns errc::operation_not_permitted if not implemented by subclass.
299   virtual std::error_code getRealPath(const Twine &Path,
300                                       SmallVectorImpl<char> &Output) const;
301 
302   /// Check whether a file exists. Provided for convenience.
303   bool exists(const Twine &Path);
304 
305   /// Is the file mounted on a local filesystem?
306   virtual std::error_code isLocal(const Twine &Path, bool &Result);
307 
308   /// Make \a Path an absolute path.
309   ///
310   /// Makes \a Path absolute using the current directory if it is not already.
311   /// An empty \a Path will result in the current directory.
312   ///
313   /// /absolute/path   => /absolute/path
314   /// relative/../path => <current-directory>/relative/../path
315   ///
316   /// \param Path A path that is modified to be an absolute path.
317   /// \returns success if \a path has been made absolute, otherwise a
318   ///          platform-specific error_code.
319   virtual std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
320 
321   enum class PrintType { Summary, Contents, RecursiveContents };
322   void print(raw_ostream &OS, PrintType Type = PrintType::Contents,
323              unsigned IndentLevel = 0) const {
324     printImpl(OS, Type, IndentLevel);
325   }
326 
327 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
328   LLVM_DUMP_METHOD void dump() const;
329 #endif
330 
331 protected:
printImpl(raw_ostream & OS,PrintType Type,unsigned IndentLevel)332   virtual void printImpl(raw_ostream &OS, PrintType Type,
333                          unsigned IndentLevel) const {
334     printIndent(OS, IndentLevel);
335     OS << "FileSystem\n";
336   }
337 
printIndent(raw_ostream & OS,unsigned IndentLevel)338   void printIndent(raw_ostream &OS, unsigned IndentLevel) const {
339     for (unsigned i = 0; i < IndentLevel; ++i)
340       OS << "  ";
341   }
342 };
343 
344 /// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
345 /// the operating system.
346 /// The working directory is linked to the process's working directory.
347 /// (This is usually thread-hostile).
348 IntrusiveRefCntPtr<FileSystem> getRealFileSystem();
349 
350 /// Create an \p vfs::FileSystem for the 'real' file system, as seen by
351 /// the operating system.
352 /// It has its own working directory, independent of (but initially equal to)
353 /// that of the process.
354 std::unique_ptr<FileSystem> createPhysicalFileSystem();
355 
356 /// A file system that allows overlaying one \p AbstractFileSystem on top
357 /// of another.
358 ///
359 /// Consists of a stack of >=1 \p FileSystem objects, which are treated as being
360 /// one merged file system. When there is a directory that exists in more than
361 /// one file system, the \p OverlayFileSystem contains a directory containing
362 /// the union of their contents.  The attributes (permissions, etc.) of the
363 /// top-most (most recently added) directory are used.  When there is a file
364 /// that exists in more than one file system, the file in the top-most file
365 /// system overrides the other(s).
366 class OverlayFileSystem : public FileSystem {
367   using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>;
368 
369   /// The stack of file systems, implemented as a list in order of
370   /// their addition.
371   FileSystemList FSList;
372 
373 public:
374   OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base);
375 
376   /// Pushes a file system on top of the stack.
377   void pushOverlay(IntrusiveRefCntPtr<FileSystem> FS);
378 
379   llvm::ErrorOr<Status> status(const Twine &Path) override;
380   llvm::ErrorOr<std::unique_ptr<File>>
381   openFileForRead(const Twine &Path) override;
382   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
383   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
384   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
385   std::error_code isLocal(const Twine &Path, bool &Result) override;
386   std::error_code getRealPath(const Twine &Path,
387                               SmallVectorImpl<char> &Output) const override;
388 
389   using iterator = FileSystemList::reverse_iterator;
390   using const_iterator = FileSystemList::const_reverse_iterator;
391   using reverse_iterator = FileSystemList::iterator;
392   using const_reverse_iterator = FileSystemList::const_iterator;
393   using range = iterator_range<iterator>;
394   using const_range = iterator_range<const_iterator>;
395 
396   /// Get an iterator pointing to the most recently added file system.
overlays_begin()397   iterator overlays_begin() { return FSList.rbegin(); }
overlays_begin()398   const_iterator overlays_begin() const { return FSList.rbegin(); }
399 
400   /// Get an iterator pointing one-past the least recently added file system.
overlays_end()401   iterator overlays_end() { return FSList.rend(); }
overlays_end()402   const_iterator overlays_end() const { return FSList.rend(); }
403 
404   /// Get an iterator pointing to the least recently added file system.
overlays_rbegin()405   reverse_iterator overlays_rbegin() { return FSList.begin(); }
overlays_rbegin()406   const_reverse_iterator overlays_rbegin() const { return FSList.begin(); }
407 
408   /// Get an iterator pointing one-past the most recently added file system.
overlays_rend()409   reverse_iterator overlays_rend() { return FSList.end(); }
overlays_rend()410   const_reverse_iterator overlays_rend() const { return FSList.end(); }
411 
overlays_range()412   range overlays_range() { return llvm::reverse(FSList); }
overlays_range()413   const_range overlays_range() const { return llvm::reverse(FSList); }
414 
415 protected:
416   void printImpl(raw_ostream &OS, PrintType Type,
417                  unsigned IndentLevel) const override;
418 };
419 
420 /// By default, this delegates all calls to the underlying file system. This
421 /// is useful when derived file systems want to override some calls and still
422 /// proxy other calls.
423 class ProxyFileSystem : public FileSystem {
424 public:
ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)425   explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)
426       : FS(std::move(FS)) {}
427 
status(const Twine & Path)428   llvm::ErrorOr<Status> status(const Twine &Path) override {
429     return FS->status(Path);
430   }
431   llvm::ErrorOr<std::unique_ptr<File>>
openFileForRead(const Twine & Path)432   openFileForRead(const Twine &Path) override {
433     return FS->openFileForRead(Path);
434   }
dir_begin(const Twine & Dir,std::error_code & EC)435   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override {
436     return FS->dir_begin(Dir, EC);
437   }
getCurrentWorkingDirectory()438   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
439     return FS->getCurrentWorkingDirectory();
440   }
setCurrentWorkingDirectory(const Twine & Path)441   std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
442     return FS->setCurrentWorkingDirectory(Path);
443   }
getRealPath(const Twine & Path,SmallVectorImpl<char> & Output)444   std::error_code getRealPath(const Twine &Path,
445                               SmallVectorImpl<char> &Output) const override {
446     return FS->getRealPath(Path, Output);
447   }
isLocal(const Twine & Path,bool & Result)448   std::error_code isLocal(const Twine &Path, bool &Result) override {
449     return FS->isLocal(Path, Result);
450   }
451 
452 protected:
getUnderlyingFS()453   FileSystem &getUnderlyingFS() const { return *FS; }
454 
455 private:
456   IntrusiveRefCntPtr<FileSystem> FS;
457 
458   virtual void anchor();
459 };
460 
461 namespace detail {
462 
463 class InMemoryDirectory;
464 class InMemoryNode;
465 
466 struct NewInMemoryNodeInfo {
467   llvm::sys::fs::UniqueID DirUID;
468   StringRef Path;
469   StringRef Name;
470   time_t ModificationTime;
471   std::unique_ptr<llvm::MemoryBuffer> Buffer;
472   uint32_t User;
473   uint32_t Group;
474   llvm::sys::fs::file_type Type;
475   llvm::sys::fs::perms Perms;
476 
477   Status makeStatus() const;
478 };
479 
480 class NamedNodeOrError {
481   ErrorOr<std::pair<llvm::SmallString<128>, const detail::InMemoryNode *>>
482       Value;
483 
484 public:
NamedNodeOrError(llvm::SmallString<128> Name,const detail::InMemoryNode * Node)485   NamedNodeOrError(llvm::SmallString<128> Name,
486                    const detail::InMemoryNode *Node)
487       : Value(std::make_pair(Name, Node)) {}
NamedNodeOrError(std::error_code EC)488   NamedNodeOrError(std::error_code EC) : Value(EC) {}
NamedNodeOrError(llvm::errc EC)489   NamedNodeOrError(llvm::errc EC) : Value(EC) {}
490 
getName()491   StringRef getName() const { return (*Value).first; }
492   explicit operator bool() const { return static_cast<bool>(Value); }
error_code()493   operator std::error_code() const { return Value.getError(); }
getError()494   std::error_code getError() const { return Value.getError(); }
495   const detail::InMemoryNode *operator*() const { return (*Value).second; }
496 };
497 
498 } // namespace detail
499 
500 /// An in-memory file system.
501 class InMemoryFileSystem : public FileSystem {
502   std::unique_ptr<detail::InMemoryDirectory> Root;
503   std::string WorkingDirectory;
504   bool UseNormalizedPaths = true;
505 
506   using MakeNodeFn = llvm::function_ref<std::unique_ptr<detail::InMemoryNode>(
507       detail::NewInMemoryNodeInfo)>;
508 
509   /// Create node with \p MakeNode and add it into this filesystem at \p Path.
510   bool addFile(const Twine &Path, time_t ModificationTime,
511                std::unique_ptr<llvm::MemoryBuffer> Buffer,
512                std::optional<uint32_t> User, std::optional<uint32_t> Group,
513                std::optional<llvm::sys::fs::file_type> Type,
514                std::optional<llvm::sys::fs::perms> Perms, MakeNodeFn MakeNode);
515 
516   /// Looks up the in-memory node for the path \p P.
517   /// If \p FollowFinalSymlink is true, the returned node is guaranteed to
518   /// not be a symlink and its path may differ from \p P.
519   detail::NamedNodeOrError lookupNode(const Twine &P, bool FollowFinalSymlink,
520                                       size_t SymlinkDepth = 0) const;
521 
522   class DirIterator;
523 
524 public:
525   explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
526   ~InMemoryFileSystem() override;
527 
528   /// Add a file containing a buffer or a directory to the VFS with a
529   /// path. The VFS owns the buffer.  If present, User, Group, Type
530   /// and Perms apply to the newly-created file or directory.
531   /// \return true if the file or directory was successfully added,
532   /// false if the file or directory already exists in the file system with
533   /// different contents.
534   bool addFile(const Twine &Path, time_t ModificationTime,
535                std::unique_ptr<llvm::MemoryBuffer> Buffer,
536                std::optional<uint32_t> User = std::nullopt,
537                std::optional<uint32_t> Group = std::nullopt,
538                std::optional<llvm::sys::fs::file_type> Type = std::nullopt,
539                std::optional<llvm::sys::fs::perms> Perms = std::nullopt);
540 
541   /// Add a hard link to a file.
542   ///
543   /// Here hard links are not intended to be fully equivalent to the classical
544   /// filesystem. Both the hard link and the file share the same buffer and
545   /// status (and thus have the same UniqueID). Because of this there is no way
546   /// to distinguish between the link and the file after the link has been
547   /// added.
548   ///
549   /// The \p Target path must be an existing file or a hardlink. The
550   /// \p NewLink file must not have been added before. The \p Target
551   /// path must not be a directory. The \p NewLink node is added as a hard
552   /// link which points to the resolved file of \p Target node.
553   /// \return true if the above condition is satisfied and hardlink was
554   /// successfully created, false otherwise.
555   bool addHardLink(const Twine &NewLink, const Twine &Target);
556 
557   /// Arbitrary max depth to search through symlinks. We can get into problems
558   /// if a link links to a link that links back to the link, for example.
559   static constexpr size_t MaxSymlinkDepth = 16;
560 
561   /// Add a symbolic link. Unlike a HardLink, because \p Target doesn't need
562   /// to refer to a file (or refer to anything, as it happens). Also, an
563   /// in-memory directory for \p Target isn't automatically created.
564   bool
565   addSymbolicLink(const Twine &NewLink, const Twine &Target,
566                   time_t ModificationTime,
567                   std::optional<uint32_t> User = std::nullopt,
568                   std::optional<uint32_t> Group = std::nullopt,
569                   std::optional<llvm::sys::fs::perms> Perms = std::nullopt);
570 
571   /// Add a buffer to the VFS with a path. The VFS does not own the buffer.
572   /// If present, User, Group, Type and Perms apply to the newly-created file
573   /// or directory.
574   /// \return true if the file or directory was successfully added,
575   /// false if the file or directory already exists in the file system with
576   /// different contents.
577   bool addFileNoOwn(const Twine &Path, time_t ModificationTime,
578                     const llvm::MemoryBufferRef &Buffer,
579                     std::optional<uint32_t> User = std::nullopt,
580                     std::optional<uint32_t> Group = std::nullopt,
581                     std::optional<llvm::sys::fs::file_type> Type = std::nullopt,
582                     std::optional<llvm::sys::fs::perms> Perms = std::nullopt);
583 
584   std::string toString() const;
585 
586   /// Return true if this file system normalizes . and .. in paths.
useNormalizedPaths()587   bool useNormalizedPaths() const { return UseNormalizedPaths; }
588 
589   llvm::ErrorOr<Status> status(const Twine &Path) override;
590   llvm::ErrorOr<std::unique_ptr<File>>
591   openFileForRead(const Twine &Path) override;
592   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
593 
getCurrentWorkingDirectory()594   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
595     return WorkingDirectory;
596   }
597   /// Canonicalizes \p Path by combining with the current working
598   /// directory and normalizing the path (e.g. remove dots). If the current
599   /// working directory is not set, this returns errc::operation_not_permitted.
600   ///
601   /// This doesn't resolve symlinks as they are not supported in in-memory file
602   /// system.
603   std::error_code getRealPath(const Twine &Path,
604                               SmallVectorImpl<char> &Output) const override;
605   std::error_code isLocal(const Twine &Path, bool &Result) override;
606   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
607 
608 protected:
609   void printImpl(raw_ostream &OS, PrintType Type,
610                  unsigned IndentLevel) const override;
611 };
612 
613 /// Get a globally unique ID for a virtual file or directory.
614 llvm::sys::fs::UniqueID getNextVirtualUniqueID();
615 
616 /// Gets a \p FileSystem for a virtual file system described in YAML
617 /// format.
618 std::unique_ptr<FileSystem>
619 getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer,
620                llvm::SourceMgr::DiagHandlerTy DiagHandler,
621                StringRef YAMLFilePath, void *DiagContext = nullptr,
622                IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
623 
624 struct YAMLVFSEntry {
625   template <typename T1, typename T2>
626   YAMLVFSEntry(T1 &&VPath, T2 &&RPath, bool IsDirectory = false)
VPathYAMLVFSEntry627       : VPath(std::forward<T1>(VPath)), RPath(std::forward<T2>(RPath)),
628         IsDirectory(IsDirectory) {}
629   std::string VPath;
630   std::string RPath;
631   bool IsDirectory = false;
632 };
633 
634 class RedirectingFSDirIterImpl;
635 class RedirectingFileSystemParser;
636 
637 /// A virtual file system parsed from a YAML file.
638 ///
639 /// Currently, this class allows creating virtual files and directories. Virtual
640 /// files map to existing external files in \c ExternalFS, and virtual
641 /// directories may either map to existing directories in \c ExternalFS or list
642 /// their contents in the form of other virtual directories and/or files.
643 ///
644 /// The basic structure of the parsed file is:
645 /// \verbatim
646 /// {
647 ///   'version': <version number>,
648 ///   <optional configuration>
649 ///   'roots': [
650 ///              <directory entries>
651 ///            ]
652 /// }
653 /// \endverbatim
654 ///
655 /// The roots may be absolute or relative. If relative they will be made
656 /// absolute against either current working directory or the directory where
657 /// the Overlay YAML file is located, depending on the 'root-relative'
658 /// configuration.
659 ///
660 /// All configuration options are optional.
661 ///   'case-sensitive': <boolean, default=(true for Posix, false for Windows)>
662 ///   'use-external-names': <boolean, default=true>
663 ///   'root-relative': <string, one of 'cwd' or 'overlay-dir', default='cwd'>
664 ///   'overlay-relative': <boolean, default=false>
665 ///   'fallthrough': <boolean, default=true, deprecated - use 'redirecting-with'
666 ///                   instead>
667 ///   'redirecting-with': <string, one of 'fallthrough', 'fallback', or
668 ///                        'redirect-only', default='fallthrough'>
669 ///
670 /// To clarify, 'root-relative' option will prepend the current working
671 /// directory, or the overlay directory to the 'roots->name' field only if
672 /// 'roots->name' is a relative path. On the other hand, when 'overlay-relative'
673 /// is set to 'true', external paths will always be prepended with the overlay
674 /// directory, even if external paths are not relative paths. The
675 /// 'root-relative' option has no interaction with the 'overlay-relative'
676 /// option.
677 ///
678 /// Virtual directories that list their contents are represented as
679 /// \verbatim
680 /// {
681 ///   'type': 'directory',
682 ///   'name': <string>,
683 ///   'contents': [ <file or directory entries> ]
684 /// }
685 /// \endverbatim
686 ///
687 /// The default attributes for such virtual directories are:
688 /// \verbatim
689 /// MTime = now() when created
690 /// Perms = 0777
691 /// User = Group = 0
692 /// Size = 0
693 /// UniqueID = unspecified unique value
694 /// \endverbatim
695 ///
696 /// When a path prefix matches such a directory, the next component in the path
697 /// is matched against the entries in the 'contents' array.
698 ///
699 /// Re-mapped directories, on the other hand, are represented as
700 /// /// \verbatim
701 /// {
702 ///   'type': 'directory-remap',
703 ///   'name': <string>,
704 ///   'use-external-name': <boolean>, # Optional
705 ///   'external-contents': <path to external directory>
706 /// }
707 /// \endverbatim
708 ///
709 /// and inherit their attributes from the external directory. When a path
710 /// prefix matches such an entry, the unmatched components are appended to the
711 /// 'external-contents' path, and the resulting path is looked up in the
712 /// external file system instead.
713 ///
714 /// Re-mapped files are represented as
715 /// \verbatim
716 /// {
717 ///   'type': 'file',
718 ///   'name': <string>,
719 ///   'use-external-name': <boolean>, # Optional
720 ///   'external-contents': <path to external file>
721 /// }
722 /// \endverbatim
723 ///
724 /// Their attributes and file contents are determined by looking up the file at
725 /// their 'external-contents' path in the external file system.
726 ///
727 /// For 'file', 'directory' and 'directory-remap' entries the 'name' field may
728 /// contain multiple path components (e.g. /path/to/file). However, any
729 /// directory in such a path that contains more than one child must be uniquely
730 /// represented by a 'directory' entry.
731 ///
732 /// When the 'use-external-name' field is set, calls to \a vfs::File::status()
733 /// give the external (remapped) filesystem name instead of the name the file
734 /// was accessed by. This is an intentional leak through the \a
735 /// RedirectingFileSystem abstraction layer. It enables clients to discover
736 /// (and use) the external file location when communicating with users or tools
737 /// that don't use the same VFS overlay.
738 ///
739 /// FIXME: 'use-external-name' causes behaviour that's inconsistent with how
740 /// "real" filesystems behave. Maybe there should be a separate channel for
741 /// this information.
742 class RedirectingFileSystem : public vfs::FileSystem {
743 public:
744   enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File };
745   enum NameKind { NK_NotSet, NK_External, NK_Virtual };
746 
747   /// The type of redirection to perform.
748   enum class RedirectKind {
749     /// Lookup the redirected path first (ie. the one specified in
750     /// 'external-contents') and if that fails "fallthrough" to a lookup of the
751     /// originally provided path.
752     Fallthrough,
753     /// Lookup the provided path first and if that fails, "fallback" to a
754     /// lookup of the redirected path.
755     Fallback,
756     /// Only lookup the redirected path, do not lookup the originally provided
757     /// path.
758     RedirectOnly
759   };
760 
761   /// The type of relative path used by Roots.
762   enum class RootRelativeKind {
763     /// The roots are relative to the current working directory.
764     CWD,
765     /// The roots are relative to the directory where the Overlay YAML file
766     // locates.
767     OverlayDir
768   };
769 
770   /// A single file or directory in the VFS.
771   class Entry {
772     EntryKind Kind;
773     std::string Name;
774 
775   public:
Entry(EntryKind K,StringRef Name)776     Entry(EntryKind K, StringRef Name) : Kind(K), Name(Name) {}
777     virtual ~Entry() = default;
778 
getName()779     StringRef getName() const { return Name; }
getKind()780     EntryKind getKind() const { return Kind; }
781   };
782 
783   /// A directory in the vfs with explicitly specified contents.
784   class DirectoryEntry : public Entry {
785     std::vector<std::unique_ptr<Entry>> Contents;
786     Status S;
787 
788   public:
789     /// Constructs a directory entry with explicitly specified contents.
DirectoryEntry(StringRef Name,std::vector<std::unique_ptr<Entry>> Contents,Status S)790     DirectoryEntry(StringRef Name, std::vector<std::unique_ptr<Entry>> Contents,
791                    Status S)
792         : Entry(EK_Directory, Name), Contents(std::move(Contents)),
793           S(std::move(S)) {}
794 
795     /// Constructs an empty directory entry.
DirectoryEntry(StringRef Name,Status S)796     DirectoryEntry(StringRef Name, Status S)
797         : Entry(EK_Directory, Name), S(std::move(S)) {}
798 
getStatus()799     Status getStatus() { return S; }
800 
addContent(std::unique_ptr<Entry> Content)801     void addContent(std::unique_ptr<Entry> Content) {
802       Contents.push_back(std::move(Content));
803     }
804 
getLastContent()805     Entry *getLastContent() const { return Contents.back().get(); }
806 
807     using iterator = decltype(Contents)::iterator;
808 
contents_begin()809     iterator contents_begin() { return Contents.begin(); }
contents_end()810     iterator contents_end() { return Contents.end(); }
811 
classof(const Entry * E)812     static bool classof(const Entry *E) { return E->getKind() == EK_Directory; }
813   };
814 
815   /// A file or directory in the vfs that is mapped to a file or directory in
816   /// the external filesystem.
817   class RemapEntry : public Entry {
818     std::string ExternalContentsPath;
819     NameKind UseName;
820 
821   protected:
RemapEntry(EntryKind K,StringRef Name,StringRef ExternalContentsPath,NameKind UseName)822     RemapEntry(EntryKind K, StringRef Name, StringRef ExternalContentsPath,
823                NameKind UseName)
824         : Entry(K, Name), ExternalContentsPath(ExternalContentsPath),
825           UseName(UseName) {}
826 
827   public:
getExternalContentsPath()828     StringRef getExternalContentsPath() const { return ExternalContentsPath; }
829 
830     /// Whether to use the external path as the name for this file or directory.
useExternalName(bool GlobalUseExternalName)831     bool useExternalName(bool GlobalUseExternalName) const {
832       return UseName == NK_NotSet ? GlobalUseExternalName
833                                   : (UseName == NK_External);
834     }
835 
getUseName()836     NameKind getUseName() const { return UseName; }
837 
classof(const Entry * E)838     static bool classof(const Entry *E) {
839       switch (E->getKind()) {
840       case EK_DirectoryRemap:
841         [[fallthrough]];
842       case EK_File:
843         return true;
844       case EK_Directory:
845         return false;
846       }
847       llvm_unreachable("invalid entry kind");
848     }
849   };
850 
851   /// A directory in the vfs that maps to a directory in the external file
852   /// system.
853   class DirectoryRemapEntry : public RemapEntry {
854   public:
DirectoryRemapEntry(StringRef Name,StringRef ExternalContentsPath,NameKind UseName)855     DirectoryRemapEntry(StringRef Name, StringRef ExternalContentsPath,
856                         NameKind UseName)
857         : RemapEntry(EK_DirectoryRemap, Name, ExternalContentsPath, UseName) {}
858 
classof(const Entry * E)859     static bool classof(const Entry *E) {
860       return E->getKind() == EK_DirectoryRemap;
861     }
862   };
863 
864   /// A file in the vfs that maps to a file in the external file system.
865   class FileEntry : public RemapEntry {
866   public:
FileEntry(StringRef Name,StringRef ExternalContentsPath,NameKind UseName)867     FileEntry(StringRef Name, StringRef ExternalContentsPath, NameKind UseName)
868         : RemapEntry(EK_File, Name, ExternalContentsPath, UseName) {}
869 
classof(const Entry * E)870     static bool classof(const Entry *E) { return E->getKind() == EK_File; }
871   };
872 
873   /// Represents the result of a path lookup into the RedirectingFileSystem.
874   struct LookupResult {
875     /// Chain of parent directory entries for \c E.
876     llvm::SmallVector<Entry *, 32> Parents;
877 
878     /// The entry the looked-up path corresponds to.
879     Entry *E;
880 
881   private:
882     /// When the found Entry is a DirectoryRemapEntry, stores the path in the
883     /// external file system that the looked-up path in the virtual file system
884     //  corresponds to.
885     std::optional<std::string> ExternalRedirect;
886 
887   public:
888     LookupResult(Entry *E, sys::path::const_iterator Start,
889                  sys::path::const_iterator End);
890 
891     /// If the found Entry maps the input path to a path in the external
892     /// file system (i.e. it is a FileEntry or DirectoryRemapEntry), returns
893     /// that path.
getExternalRedirectLookupResult894     std::optional<StringRef> getExternalRedirect() const {
895       if (isa<DirectoryRemapEntry>(E))
896         return StringRef(*ExternalRedirect);
897       if (auto *FE = dyn_cast<FileEntry>(E))
898         return FE->getExternalContentsPath();
899       return std::nullopt;
900     }
901 
902     /// Get the (canonical) path of the found entry. This uses the as-written
903     /// path components from the VFS specification.
904     void getPath(llvm::SmallVectorImpl<char> &Path) const;
905   };
906 
907 private:
908   friend class RedirectingFSDirIterImpl;
909   friend class RedirectingFileSystemParser;
910 
911   /// Canonicalize path by removing ".", "..", "./", components. This is
912   /// a VFS request, do not bother about symlinks in the path components
913   /// but canonicalize in order to perform the correct entry search.
914   std::error_code makeCanonical(SmallVectorImpl<char> &Path) const;
915 
916   /// Get the File status, or error, from the underlying external file system.
917   /// This returns the status with the originally requested name, while looking
918   /// up the entry using the canonical path.
919   ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath,
920                                     const Twine &OriginalPath) const;
921 
922   /// Make \a Path an absolute path.
923   ///
924   /// Makes \a Path absolute using the \a WorkingDir if it is not already.
925   ///
926   /// /absolute/path   => /absolute/path
927   /// relative/../path => <WorkingDir>/relative/../path
928   ///
929   /// \param WorkingDir  A path that will be used as the base Dir if \a Path
930   ///                    is not already absolute.
931   /// \param Path A path that is modified to be an absolute path.
932   /// \returns success if \a path has been made absolute, otherwise a
933   ///          platform-specific error_code.
934   std::error_code makeAbsolute(StringRef WorkingDir,
935                                SmallVectorImpl<char> &Path) const;
936 
937   // In a RedirectingFileSystem, keys can be specified in Posix or Windows
938   // style (or even a mixture of both), so this comparison helper allows
939   // slashes (representing a root) to match backslashes (and vice versa).  Note
940   // that, other than the root, path components should not contain slashes or
941   // backslashes.
pathComponentMatches(llvm::StringRef lhs,llvm::StringRef rhs)942   bool pathComponentMatches(llvm::StringRef lhs, llvm::StringRef rhs) const {
943     if ((CaseSensitive ? lhs.equals(rhs) : lhs.equals_insensitive(rhs)))
944       return true;
945     return (lhs == "/" && rhs == "\\") || (lhs == "\\" && rhs == "/");
946   }
947 
948   /// The root(s) of the virtual file system.
949   std::vector<std::unique_ptr<Entry>> Roots;
950 
951   /// The current working directory of the file system.
952   std::string WorkingDirectory;
953 
954   /// The file system to use for external references.
955   IntrusiveRefCntPtr<FileSystem> ExternalFS;
956 
957   /// This represents the directory path that the YAML file is located.
958   /// This will be prefixed to each 'external-contents' if IsRelativeOverlay
959   /// is set. This will also be prefixed to each 'roots->name' if RootRelative
960   /// is set to RootRelativeKind::OverlayDir and the path is relative.
961   std::string OverlayFileDir;
962 
963   /// @name Configuration
964   /// @{
965 
966   /// Whether to perform case-sensitive comparisons.
967   ///
968   /// Currently, case-insensitive matching only works correctly with ASCII.
969   bool CaseSensitive = is_style_posix(sys::path::Style::native);
970 
971   /// IsRelativeOverlay marks whether a OverlayFileDir path must
972   /// be prefixed in every 'external-contents' when reading from YAML files.
973   bool IsRelativeOverlay = false;
974 
975   /// Whether to use to use the value of 'external-contents' for the
976   /// names of files.  This global value is overridable on a per-file basis.
977   bool UseExternalNames = true;
978 
979   /// Determines the lookups to perform, as well as their order. See
980   /// \c RedirectKind for details.
981   RedirectKind Redirection = RedirectKind::Fallthrough;
982 
983   /// Determine the prefix directory if the roots are relative paths. See
984   /// \c RootRelativeKind for details.
985   RootRelativeKind RootRelative = RootRelativeKind::CWD;
986   /// @}
987 
988   RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS);
989 
990   /// Looks up the path <tt>[Start, End)</tt> in \p From, possibly recursing
991   /// into the contents of \p From if it is a directory. Returns a LookupResult
992   /// giving the matched entry and, if that entry is a FileEntry or
993   /// DirectoryRemapEntry, the path it redirects to in the external file system.
994   ErrorOr<LookupResult>
995   lookupPathImpl(llvm::sys::path::const_iterator Start,
996                  llvm::sys::path::const_iterator End, Entry *From,
997                  llvm::SmallVectorImpl<Entry *> &Entries) const;
998 
999   /// Get the status for a path with the provided \c LookupResult.
1000   ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath,
1001                          const LookupResult &Result);
1002 
1003 public:
1004   /// Looks up \p Path in \c Roots and returns a LookupResult giving the
1005   /// matched entry and, if the entry was a FileEntry or DirectoryRemapEntry,
1006   /// the path it redirects to in the external file system.
1007   ErrorOr<LookupResult> lookupPath(StringRef Path) const;
1008 
1009   /// Parses \p Buffer, which is expected to be in YAML format and
1010   /// returns a virtual file system representing its contents.
1011   static std::unique_ptr<RedirectingFileSystem>
1012   create(std::unique_ptr<MemoryBuffer> Buffer,
1013          SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
1014          void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS);
1015 
1016   /// Redirect each of the remapped files from first to second.
1017   static std::unique_ptr<RedirectingFileSystem>
1018   create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles,
1019          bool UseExternalNames, FileSystem &ExternalFS);
1020 
1021   ErrorOr<Status> status(const Twine &Path) override;
1022   ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
1023 
1024   std::error_code getRealPath(const Twine &Path,
1025                               SmallVectorImpl<char> &Output) const override;
1026 
1027   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
1028 
1029   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
1030 
1031   std::error_code isLocal(const Twine &Path, bool &Result) override;
1032 
1033   std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const override;
1034 
1035   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
1036 
1037   void setOverlayFileDir(StringRef PrefixDir);
1038 
1039   StringRef getOverlayFileDir() const;
1040 
1041   /// Sets the redirection kind to \c Fallthrough if true or \c RedirectOnly
1042   /// otherwise. Will removed in the future, use \c setRedirection instead.
1043   void setFallthrough(bool Fallthrough);
1044 
1045   void setRedirection(RedirectingFileSystem::RedirectKind Kind);
1046 
1047   std::vector<llvm::StringRef> getRoots() const;
1048 
1049   void printEntry(raw_ostream &OS, Entry *E, unsigned IndentLevel = 0) const;
1050 
1051 protected:
1052   void printImpl(raw_ostream &OS, PrintType Type,
1053                  unsigned IndentLevel) const override;
1054 };
1055 
1056 /// Collect all pairs of <virtual path, real path> entries from the
1057 /// \p YAMLFilePath. This is used by the module dependency collector to forward
1058 /// the entries into the reproducer output VFS YAML file.
1059 void collectVFSFromYAML(
1060     std::unique_ptr<llvm::MemoryBuffer> Buffer,
1061     llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
1062     SmallVectorImpl<YAMLVFSEntry> &CollectedEntries,
1063     void *DiagContext = nullptr,
1064     IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
1065 
1066 class YAMLVFSWriter {
1067   std::vector<YAMLVFSEntry> Mappings;
1068   std::optional<bool> IsCaseSensitive;
1069   std::optional<bool> IsOverlayRelative;
1070   std::optional<bool> UseExternalNames;
1071   std::string OverlayDir;
1072 
1073   void addEntry(StringRef VirtualPath, StringRef RealPath, bool IsDirectory);
1074 
1075 public:
1076   YAMLVFSWriter() = default;
1077 
1078   void addFileMapping(StringRef VirtualPath, StringRef RealPath);
1079   void addDirectoryMapping(StringRef VirtualPath, StringRef RealPath);
1080 
setCaseSensitivity(bool CaseSensitive)1081   void setCaseSensitivity(bool CaseSensitive) {
1082     IsCaseSensitive = CaseSensitive;
1083   }
1084 
setUseExternalNames(bool UseExtNames)1085   void setUseExternalNames(bool UseExtNames) { UseExternalNames = UseExtNames; }
1086 
setOverlayDir(StringRef OverlayDirectory)1087   void setOverlayDir(StringRef OverlayDirectory) {
1088     IsOverlayRelative = true;
1089     OverlayDir.assign(OverlayDirectory.str());
1090   }
1091 
getMappings()1092   const std::vector<YAMLVFSEntry> &getMappings() const { return Mappings; }
1093 
1094   void write(llvm::raw_ostream &OS);
1095 };
1096 
1097 } // namespace vfs
1098 } // namespace llvm
1099 
1100 #endif // LLVM_SUPPORT_VIRTUALFILESYSTEM_H
1101