1 //===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the virtual file system interface vfs::FileSystem.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_SUPPORT_VIRTUALFILESYSTEM_H
15 #define LLVM_SUPPORT_VIRTUALFILESYSTEM_H
16 
17 #include "llvm/ADT/IntrusiveRefCntPtr.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Chrono.h"
23 #include "llvm/Support/ErrorOr.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/Path.h"
26 #include "llvm/Support/SourceMgr.h"
27 #include <cassert>
28 #include <cstdint>
29 #include <ctime>
30 #include <memory>
31 #include <stack>
32 #include <string>
33 #include <system_error>
34 #include <utility>
35 #include <vector>
36 
37 namespace llvm {
38 
39 class MemoryBuffer;
40 class MemoryBufferRef;
41 class Twine;
42 
43 namespace vfs {
44 
45 /// The result of a \p status operation.
46 class Status {
47   std::string Name;
48   llvm::sys::fs::UniqueID UID;
49   llvm::sys::TimePoint<> MTime;
50   uint32_t User;
51   uint32_t Group;
52   uint64_t Size;
53   llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::status_error;
54   llvm::sys::fs::perms Perms;
55 
56 public:
57   // FIXME: remove when files support multiple names
58   bool IsVFSMapped = false;
59 
60   Status() = default;
61   Status(const llvm::sys::fs::file_status &Status);
62   Status(const Twine &Name, llvm::sys::fs::UniqueID UID,
63          llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group,
64          uint64_t Size, llvm::sys::fs::file_type Type,
65          llvm::sys::fs::perms Perms);
66 
67   /// Get a copy of a Status with a different name.
68   static Status copyWithNewName(const Status &In, const Twine &NewName);
69   static Status copyWithNewName(const llvm::sys::fs::file_status &In,
70                                 const Twine &NewName);
71 
72   /// Returns the name that should be used for this file or directory.
getName()73   StringRef getName() const { return Name; }
74 
75   /// @name Status interface from llvm::sys::fs
76   /// @{
getType()77   llvm::sys::fs::file_type getType() const { return Type; }
getPermissions()78   llvm::sys::fs::perms getPermissions() const { return Perms; }
getLastModificationTime()79   llvm::sys::TimePoint<> getLastModificationTime() const { return MTime; }
getUniqueID()80   llvm::sys::fs::UniqueID getUniqueID() const { return UID; }
getUser()81   uint32_t getUser() const { return User; }
getGroup()82   uint32_t getGroup() const { return Group; }
getSize()83   uint64_t getSize() const { return Size; }
84   /// @}
85   /// @name Status queries
86   /// These are static queries in llvm::sys::fs.
87   /// @{
88   bool equivalent(const Status &Other) const;
89   bool isDirectory() const;
90   bool isRegularFile() const;
91   bool isOther() const;
92   bool isSymlink() const;
93   bool isStatusKnown() const;
94   bool exists() const;
95   /// @}
96 };
97 
98 /// Represents an open file.
99 class File {
100 public:
101   /// Destroy the file after closing it (if open).
102   /// Sub-classes should generally call close() inside their destructors.  We
103   /// cannot do that from the base class, since close is virtual.
104   virtual ~File();
105 
106   /// Get the status of the file.
107   virtual llvm::ErrorOr<Status> status() = 0;
108 
109   /// Get the name of the file
getName()110   virtual llvm::ErrorOr<std::string> getName() {
111     if (auto Status = status())
112       return Status->getName().str();
113     else
114       return Status.getError();
115   }
116 
117   /// Get the contents of the file as a \p MemoryBuffer.
118   virtual llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
119   getBuffer(const Twine &Name, int64_t FileSize = -1,
120             bool RequiresNullTerminator = true, bool IsVolatile = false) = 0;
121 
122   /// Closes the file.
123   virtual std::error_code close() = 0;
124 };
125 
126 /// A member of a directory, yielded by a directory_iterator.
127 /// Only information available on most platforms is included.
128 class directory_entry {
129   std::string Path;
130   llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::type_unknown;
131 
132 public:
133   directory_entry() = default;
directory_entry(std::string Path,llvm::sys::fs::file_type Type)134   directory_entry(std::string Path, llvm::sys::fs::file_type Type)
135       : Path(std::move(Path)), Type(Type) {}
136 
path()137   llvm::StringRef path() const { return Path; }
type()138   llvm::sys::fs::file_type type() const { return Type; }
139 };
140 
141 namespace detail {
142 
143 /// An interface for virtual file systems to provide an iterator over the
144 /// (non-recursive) contents of a directory.
145 struct DirIterImpl {
146   virtual ~DirIterImpl();
147 
148   /// Sets \c CurrentEntry to the next entry in the directory on success,
149   /// to directory_entry() at end,  or returns a system-defined \c error_code.
150   virtual std::error_code increment() = 0;
151 
152   directory_entry CurrentEntry;
153 };
154 
155 } // namespace detail
156 
157 /// An input iterator over the entries in a virtual path, similar to
158 /// llvm::sys::fs::directory_iterator.
159 class directory_iterator {
160   std::shared_ptr<detail::DirIterImpl> Impl; // Input iterator semantics on copy
161 
162 public:
directory_iterator(std::shared_ptr<detail::DirIterImpl> I)163   directory_iterator(std::shared_ptr<detail::DirIterImpl> I)
164       : Impl(std::move(I)) {
165     assert(Impl.get() != nullptr && "requires non-null implementation");
166     if (Impl->CurrentEntry.path().empty())
167       Impl.reset(); // Normalize the end iterator to Impl == nullptr.
168   }
169 
170   /// Construct an 'end' iterator.
171   directory_iterator() = default;
172 
173   /// Equivalent to operator++, with an error code.
increment(std::error_code & EC)174   directory_iterator &increment(std::error_code &EC) {
175     assert(Impl && "attempting to increment past end");
176     EC = Impl->increment();
177     if (Impl->CurrentEntry.path().empty())
178       Impl.reset(); // Normalize the end iterator to Impl == nullptr.
179     return *this;
180   }
181 
182   const directory_entry &operator*() const { return Impl->CurrentEntry; }
183   const directory_entry *operator->() const { return &Impl->CurrentEntry; }
184 
185   bool operator==(const directory_iterator &RHS) const {
186     if (Impl && RHS.Impl)
187       return Impl->CurrentEntry.path() == RHS.Impl->CurrentEntry.path();
188     return !Impl && !RHS.Impl;
189   }
190   bool operator!=(const directory_iterator &RHS) const {
191     return !(*this == RHS);
192   }
193 };
194 
195 class FileSystem;
196 
197 namespace detail {
198 
199 /// Keeps state for the recursive_directory_iterator.
200 struct RecDirIterState {
201   std::stack<directory_iterator, std::vector<directory_iterator>> Stack;
202   bool HasNoPushRequest = false;
203 };
204 
205 } // end namespace detail
206 
207 /// An input iterator over the recursive contents of a virtual path,
208 /// similar to llvm::sys::fs::recursive_directory_iterator.
209 class recursive_directory_iterator {
210   FileSystem *FS;
211   std::shared_ptr<detail::RecDirIterState>
212       State; // Input iterator semantics on copy.
213 
214 public:
215   recursive_directory_iterator(FileSystem &FS, const Twine &Path,
216                                std::error_code &EC);
217 
218   /// Construct an 'end' iterator.
219   recursive_directory_iterator() = default;
220 
221   /// Equivalent to operator++, with an error code.
222   recursive_directory_iterator &increment(std::error_code &EC);
223 
224   const directory_entry &operator*() const { return *State->Stack.top(); }
225   const directory_entry *operator->() const { return &*State->Stack.top(); }
226 
227   bool operator==(const recursive_directory_iterator &Other) const {
228     return State == Other.State; // identity
229   }
230   bool operator!=(const recursive_directory_iterator &RHS) const {
231     return !(*this == RHS);
232   }
233 
234   /// Gets the current level. Starting path is at level 0.
level()235   int level() const {
236     assert(!State->Stack.empty() &&
237            "Cannot get level without any iteration state");
238     return State->Stack.size() - 1;
239   }
240 
no_push()241   void no_push() { State->HasNoPushRequest = true; }
242 };
243 
244 /// The virtual file system interface.
245 class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem> {
246 public:
247   virtual ~FileSystem();
248 
249   /// Get the status of the entry at \p Path, if one exists.
250   virtual llvm::ErrorOr<Status> status(const Twine &Path) = 0;
251 
252   /// Get a \p File object for the file at \p Path, if one exists.
253   virtual llvm::ErrorOr<std::unique_ptr<File>>
254   openFileForRead(const Twine &Path) = 0;
255 
256   /// This is a convenience method that opens a file, gets its content and then
257   /// closes the file.
258   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
259   getBufferForFile(const Twine &Name, int64_t FileSize = -1,
260                    bool RequiresNullTerminator = true, bool IsVolatile = false);
261 
262   /// Get a directory_iterator for \p Dir.
263   /// \note The 'end' iterator is directory_iterator().
264   virtual directory_iterator dir_begin(const Twine &Dir,
265                                        std::error_code &EC) = 0;
266 
267   /// Set the working directory. This will affect all following operations on
268   /// this file system and may propagate down for nested file systems.
269   virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0;
270 
271   /// Get the working directory of this file system.
272   virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0;
273 
274   /// Gets real path of \p Path e.g. collapse all . and .. patterns, resolve
275   /// symlinks. For real file system, this uses `llvm::sys::fs::real_path`.
276   /// This returns errc::operation_not_permitted if not implemented by subclass.
277   virtual std::error_code getRealPath(const Twine &Path,
278                                       SmallVectorImpl<char> &Output) const;
279 
280   /// Check whether a file exists. Provided for convenience.
281   bool exists(const Twine &Path);
282 
283   /// Is the file mounted on a local filesystem?
284   virtual std::error_code isLocal(const Twine &Path, bool &Result);
285 
286   /// Make \a Path an absolute path.
287   ///
288   /// Makes \a Path absolute using the current directory if it is not already.
289   /// An empty \a Path will result in the current directory.
290   ///
291   /// /absolute/path   => /absolute/path
292   /// relative/../path => <current-directory>/relative/../path
293   ///
294   /// \param Path A path that is modified to be an absolute path.
295   /// \returns success if \a path has been made absolute, otherwise a
296   ///          platform-specific error_code.
297   virtual std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
298 };
299 
300 /// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
301 /// the operating system.
302 /// The working directory is linked to the process's working directory.
303 /// (This is usually thread-hostile).
304 IntrusiveRefCntPtr<FileSystem> getRealFileSystem();
305 
306 /// Create an \p vfs::FileSystem for the 'real' file system, as seen by
307 /// the operating system.
308 /// It has its own working directory, independent of (but initially equal to)
309 /// that of the process.
310 std::unique_ptr<FileSystem> createPhysicalFileSystem();
311 
312 /// A file system that allows overlaying one \p AbstractFileSystem on top
313 /// of another.
314 ///
315 /// Consists of a stack of >=1 \p FileSystem objects, which are treated as being
316 /// one merged file system. When there is a directory that exists in more than
317 /// one file system, the \p OverlayFileSystem contains a directory containing
318 /// the union of their contents.  The attributes (permissions, etc.) of the
319 /// top-most (most recently added) directory are used.  When there is a file
320 /// that exists in more than one file system, the file in the top-most file
321 /// system overrides the other(s).
322 class OverlayFileSystem : public FileSystem {
323   using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>;
324 
325   /// The stack of file systems, implemented as a list in order of
326   /// their addition.
327   FileSystemList FSList;
328 
329 public:
330   OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base);
331 
332   /// Pushes a file system on top of the stack.
333   void pushOverlay(IntrusiveRefCntPtr<FileSystem> FS);
334 
335   llvm::ErrorOr<Status> status(const Twine &Path) override;
336   llvm::ErrorOr<std::unique_ptr<File>>
337   openFileForRead(const Twine &Path) override;
338   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
339   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
340   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
341   std::error_code isLocal(const Twine &Path, bool &Result) override;
342   std::error_code getRealPath(const Twine &Path,
343                               SmallVectorImpl<char> &Output) const override;
344 
345   using iterator = FileSystemList::reverse_iterator;
346   using const_iterator = FileSystemList::const_reverse_iterator;
347   using reverse_iterator = FileSystemList::iterator;
348   using const_reverse_iterator = FileSystemList::const_iterator;
349 
350   /// Get an iterator pointing to the most recently added file system.
overlays_begin()351   iterator overlays_begin() { return FSList.rbegin(); }
overlays_begin()352   const_iterator overlays_begin() const { return FSList.rbegin(); }
353 
354   /// Get an iterator pointing one-past the least recently added file system.
overlays_end()355   iterator overlays_end() { return FSList.rend(); }
overlays_end()356   const_iterator overlays_end() const { return FSList.rend(); }
357 
358   /// Get an iterator pointing to the least recently added file system.
overlays_rbegin()359   reverse_iterator overlays_rbegin() { return FSList.begin(); }
overlays_rbegin()360   const_reverse_iterator overlays_rbegin() const { return FSList.begin(); }
361 
362   /// Get an iterator pointing one-past the most recently added file system.
overlays_rend()363   reverse_iterator overlays_rend() { return FSList.end(); }
overlays_rend()364   const_reverse_iterator overlays_rend() const { return FSList.end(); }
365 };
366 
367 /// By default, this delegates all calls to the underlying file system. This
368 /// is useful when derived file systems want to override some calls and still
369 /// proxy other calls.
370 class ProxyFileSystem : public FileSystem {
371 public:
ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)372   explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)
373       : FS(std::move(FS)) {}
374 
status(const Twine & Path)375   llvm::ErrorOr<Status> status(const Twine &Path) override {
376     return FS->status(Path);
377   }
378   llvm::ErrorOr<std::unique_ptr<File>>
openFileForRead(const Twine & Path)379   openFileForRead(const Twine &Path) override {
380     return FS->openFileForRead(Path);
381   }
dir_begin(const Twine & Dir,std::error_code & EC)382   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override {
383     return FS->dir_begin(Dir, EC);
384   }
getCurrentWorkingDirectory()385   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
386     return FS->getCurrentWorkingDirectory();
387   }
setCurrentWorkingDirectory(const Twine & Path)388   std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
389     return FS->setCurrentWorkingDirectory(Path);
390   }
getRealPath(const Twine & Path,SmallVectorImpl<char> & Output)391   std::error_code getRealPath(const Twine &Path,
392                               SmallVectorImpl<char> &Output) const override {
393     return FS->getRealPath(Path, Output);
394   }
isLocal(const Twine & Path,bool & Result)395   std::error_code isLocal(const Twine &Path, bool &Result) override {
396     return FS->isLocal(Path, Result);
397   }
398 
399 protected:
getUnderlyingFS()400   FileSystem &getUnderlyingFS() { return *FS; }
401 
402 private:
403   IntrusiveRefCntPtr<FileSystem> FS;
404 
405   virtual void anchor();
406 };
407 
408 namespace detail {
409 
410 class InMemoryDirectory;
411 class InMemoryFile;
412 
413 } // namespace detail
414 
415 /// An in-memory file system.
416 class InMemoryFileSystem : public FileSystem {
417   std::unique_ptr<detail::InMemoryDirectory> Root;
418   std::string WorkingDirectory;
419   bool UseNormalizedPaths = true;
420 
421   /// If HardLinkTarget is non-null, a hardlink is created to the To path which
422   /// must be a file. If it is null then it adds the file as the public addFile.
423   bool addFile(const Twine &Path, time_t ModificationTime,
424                std::unique_ptr<llvm::MemoryBuffer> Buffer,
425                Optional<uint32_t> User, Optional<uint32_t> Group,
426                Optional<llvm::sys::fs::file_type> Type,
427                Optional<llvm::sys::fs::perms> Perms,
428                const detail::InMemoryFile *HardLinkTarget);
429 
430 public:
431   explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
432   ~InMemoryFileSystem() override;
433 
434   /// Add a file containing a buffer or a directory to the VFS with a
435   /// path. The VFS owns the buffer.  If present, User, Group, Type
436   /// and Perms apply to the newly-created file or directory.
437   /// \return true if the file or directory was successfully added,
438   /// false if the file or directory already exists in the file system with
439   /// different contents.
440   bool addFile(const Twine &Path, time_t ModificationTime,
441                std::unique_ptr<llvm::MemoryBuffer> Buffer,
442                Optional<uint32_t> User = None, Optional<uint32_t> Group = None,
443                Optional<llvm::sys::fs::file_type> Type = None,
444                Optional<llvm::sys::fs::perms> Perms = None);
445 
446   /// Add a hard link to a file.
447   /// Here hard links are not intended to be fully equivalent to the classical
448   /// filesystem. Both the hard link and the file share the same buffer and
449   /// status (and thus have the same UniqueID). Because of this there is no way
450   /// to distinguish between the link and the file after the link has been
451   /// added.
452   ///
453   /// The To path must be an existing file or a hardlink. The From file must not
454   /// have been added before. The To Path must not be a directory. The From Node
455   /// is added as a hard link which points to the resolved file of To Node.
456   /// \return true if the above condition is satisfied and hardlink was
457   /// successfully created, false otherwise.
458   bool addHardLink(const Twine &From, const Twine &To);
459 
460   /// Add a buffer to the VFS with a path. The VFS does not own the buffer.
461   /// If present, User, Group, Type and Perms apply to the newly-created file
462   /// or directory.
463   /// \return true if the file or directory was successfully added,
464   /// false if the file or directory already exists in the file system with
465   /// different contents.
466   bool addFileNoOwn(const Twine &Path, time_t ModificationTime,
467                     const llvm::MemoryBufferRef &Buffer,
468                     Optional<uint32_t> User = None,
469                     Optional<uint32_t> Group = None,
470                     Optional<llvm::sys::fs::file_type> Type = None,
471                     Optional<llvm::sys::fs::perms> Perms = None);
472 
473   std::string toString() const;
474 
475   /// Return true if this file system normalizes . and .. in paths.
useNormalizedPaths()476   bool useNormalizedPaths() const { return UseNormalizedPaths; }
477 
478   llvm::ErrorOr<Status> status(const Twine &Path) override;
479   llvm::ErrorOr<std::unique_ptr<File>>
480   openFileForRead(const Twine &Path) override;
481   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
482 
getCurrentWorkingDirectory()483   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
484     return WorkingDirectory;
485   }
486   /// Canonicalizes \p Path by combining with the current working
487   /// directory and normalizing the path (e.g. remove dots). If the current
488   /// working directory is not set, this returns errc::operation_not_permitted.
489   ///
490   /// This doesn't resolve symlinks as they are not supported in in-memory file
491   /// system.
492   std::error_code getRealPath(const Twine &Path,
493                               SmallVectorImpl<char> &Output) const override;
494   std::error_code isLocal(const Twine &Path, bool &Result) override;
495   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
496 };
497 
498 /// Get a globally unique ID for a virtual file or directory.
499 llvm::sys::fs::UniqueID getNextVirtualUniqueID();
500 
501 /// Gets a \p FileSystem for a virtual file system described in YAML
502 /// format.
503 std::unique_ptr<FileSystem>
504 getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer,
505                llvm::SourceMgr::DiagHandlerTy DiagHandler,
506                StringRef YAMLFilePath, void *DiagContext = nullptr,
507                IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
508 
509 struct YAMLVFSEntry {
510   template <typename T1, typename T2>
511   YAMLVFSEntry(T1 &&VPath, T2 &&RPath, bool IsDirectory = false)
VPathYAMLVFSEntry512       : VPath(std::forward<T1>(VPath)), RPath(std::forward<T2>(RPath)),
513         IsDirectory(IsDirectory) {}
514   std::string VPath;
515   std::string RPath;
516   bool IsDirectory = false;
517 };
518 
519 class RedirectingFSDirIterImpl;
520 class RedirectingFileSystemParser;
521 
522 /// A virtual file system parsed from a YAML file.
523 ///
524 /// Currently, this class allows creating virtual files and directories. Virtual
525 /// files map to existing external files in \c ExternalFS, and virtual
526 /// directories may either map to existing directories in \c ExternalFS or list
527 /// their contents in the form of other virtual directories and/or files.
528 ///
529 /// The basic structure of the parsed file is:
530 /// \verbatim
531 /// {
532 ///   'version': <version number>,
533 ///   <optional configuration>
534 ///   'roots': [
535 ///              <directory entries>
536 ///            ]
537 /// }
538 /// \endverbatim
539 ///
540 /// All configuration options are optional.
541 ///   'case-sensitive': <boolean, default=(true for Posix, false for Windows)>
542 ///   'use-external-names': <boolean, default=true>
543 ///   'overlay-relative': <boolean, default=false>
544 ///   'fallthrough': <boolean, default=true>
545 ///
546 /// Virtual directories that list their contents are represented as
547 /// \verbatim
548 /// {
549 ///   'type': 'directory',
550 ///   'name': <string>,
551 ///   'contents': [ <file or directory entries> ]
552 /// }
553 /// \endverbatim
554 ///
555 /// The default attributes for such virtual directories are:
556 /// \verbatim
557 /// MTime = now() when created
558 /// Perms = 0777
559 /// User = Group = 0
560 /// Size = 0
561 /// UniqueID = unspecified unique value
562 /// \endverbatim
563 ///
564 /// When a path prefix matches such a directory, the next component in the path
565 /// is matched against the entries in the 'contents' array.
566 ///
567 /// Re-mapped directories, on the other hand, are represented as
568 /// /// \verbatim
569 /// {
570 ///   'type': 'directory-remap',
571 ///   'name': <string>,
572 ///   'use-external-name': <boolean>, # Optional
573 ///   'external-contents': <path to external directory>
574 /// }
575 /// \endverbatim
576 ///
577 /// and inherit their attributes from the external directory. When a path
578 /// prefix matches such an entry, the unmatched components are appended to the
579 /// 'external-contents' path, and the resulting path is looked up in the
580 /// external file system instead.
581 ///
582 /// Re-mapped files are represented as
583 /// \verbatim
584 /// {
585 ///   'type': 'file',
586 ///   'name': <string>,
587 ///   'use-external-name': <boolean>, # Optional
588 ///   'external-contents': <path to external file>
589 /// }
590 /// \endverbatim
591 ///
592 /// Their attributes and file contents are determined by looking up the file at
593 /// their 'external-contents' path in the external file system.
594 ///
595 /// For 'file', 'directory' and 'directory-remap' entries the 'name' field may
596 /// contain multiple path components (e.g. /path/to/file). However, any
597 /// directory in such a path that contains more than one child must be uniquely
598 /// represented by a 'directory' entry.
599 class RedirectingFileSystem : public vfs::FileSystem {
600 public:
601   enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File };
602   enum NameKind { NK_NotSet, NK_External, NK_Virtual };
603 
604   /// A single file or directory in the VFS.
605   class Entry {
606     EntryKind Kind;
607     std::string Name;
608 
609   public:
Entry(EntryKind K,StringRef Name)610     Entry(EntryKind K, StringRef Name) : Kind(K), Name(Name) {}
611     virtual ~Entry() = default;
612 
getName()613     StringRef getName() const { return Name; }
getKind()614     EntryKind getKind() const { return Kind; }
615   };
616 
617   /// A directory in the vfs with explicitly specified contents.
618   class DirectoryEntry : public Entry {
619     std::vector<std::unique_ptr<Entry>> Contents;
620     Status S;
621 
622   public:
623     /// Constructs a directory entry with explicitly specified contents.
DirectoryEntry(StringRef Name,std::vector<std::unique_ptr<Entry>> Contents,Status S)624     DirectoryEntry(StringRef Name, std::vector<std::unique_ptr<Entry>> Contents,
625                    Status S)
626         : Entry(EK_Directory, Name), Contents(std::move(Contents)),
627           S(std::move(S)) {}
628 
629     /// Constructs an empty directory entry.
DirectoryEntry(StringRef Name,Status S)630     DirectoryEntry(StringRef Name, Status S)
631         : Entry(EK_Directory, Name), S(std::move(S)) {}
632 
getStatus()633     Status getStatus() { return S; }
634 
addContent(std::unique_ptr<Entry> Content)635     void addContent(std::unique_ptr<Entry> Content) {
636       Contents.push_back(std::move(Content));
637     }
638 
getLastContent()639     Entry *getLastContent() const { return Contents.back().get(); }
640 
641     using iterator = decltype(Contents)::iterator;
642 
contents_begin()643     iterator contents_begin() { return Contents.begin(); }
contents_end()644     iterator contents_end() { return Contents.end(); }
645 
classof(const Entry * E)646     static bool classof(const Entry *E) { return E->getKind() == EK_Directory; }
647   };
648 
649   /// A file or directory in the vfs that is mapped to a file or directory in
650   /// the external filesystem.
651   class RemapEntry : public Entry {
652     std::string ExternalContentsPath;
653     NameKind UseName;
654 
655   protected:
RemapEntry(EntryKind K,StringRef Name,StringRef ExternalContentsPath,NameKind UseName)656     RemapEntry(EntryKind K, StringRef Name, StringRef ExternalContentsPath,
657                NameKind UseName)
658         : Entry(K, Name), ExternalContentsPath(ExternalContentsPath),
659           UseName(UseName) {}
660 
661   public:
getExternalContentsPath()662     StringRef getExternalContentsPath() const { return ExternalContentsPath; }
663 
664     /// Whether to use the external path as the name for this file or directory.
useExternalName(bool GlobalUseExternalName)665     bool useExternalName(bool GlobalUseExternalName) const {
666       return UseName == NK_NotSet ? GlobalUseExternalName
667                                   : (UseName == NK_External);
668     }
669 
getUseName()670     NameKind getUseName() const { return UseName; }
671 
classof(const Entry * E)672     static bool classof(const Entry *E) {
673       switch (E->getKind()) {
674       case EK_DirectoryRemap:
675         LLVM_FALLTHROUGH;
676       case EK_File:
677         return true;
678       case EK_Directory:
679         return false;
680       }
681       llvm_unreachable("invalid entry kind");
682     }
683   };
684 
685   /// A directory in the vfs that maps to a directory in the external file
686   /// system.
687   class DirectoryRemapEntry : public RemapEntry {
688   public:
DirectoryRemapEntry(StringRef Name,StringRef ExternalContentsPath,NameKind UseName)689     DirectoryRemapEntry(StringRef Name, StringRef ExternalContentsPath,
690                         NameKind UseName)
691         : RemapEntry(EK_DirectoryRemap, Name, ExternalContentsPath, UseName) {}
692 
classof(const Entry * E)693     static bool classof(const Entry *E) {
694       return E->getKind() == EK_DirectoryRemap;
695     }
696   };
697 
698   /// A file in the vfs that maps to a file in the external file system.
699   class FileEntry : public RemapEntry {
700   public:
FileEntry(StringRef Name,StringRef ExternalContentsPath,NameKind UseName)701     FileEntry(StringRef Name, StringRef ExternalContentsPath, NameKind UseName)
702         : RemapEntry(EK_File, Name, ExternalContentsPath, UseName) {}
703 
classof(const Entry * E)704     static bool classof(const Entry *E) { return E->getKind() == EK_File; }
705   };
706 
707   /// Represents the result of a path lookup into the RedirectingFileSystem.
708   struct LookupResult {
709     /// The entry the looked-up path corresponds to.
710     Entry *E;
711 
712   private:
713     /// When the found Entry is a DirectoryRemapEntry, stores the path in the
714     /// external file system that the looked-up path in the virtual file system
715     //  corresponds to.
716     Optional<std::string> ExternalRedirect;
717 
718   public:
719     LookupResult(Entry *E, sys::path::const_iterator Start,
720                  sys::path::const_iterator End);
721 
722     /// If the found Entry maps the the input path to a path in the external
723     /// file system (i.e. it is a FileEntry or DirectoryRemapEntry), returns
724     /// that path.
getExternalRedirectLookupResult725     Optional<StringRef> getExternalRedirect() const {
726       if (isa<DirectoryRemapEntry>(E))
727         return StringRef(*ExternalRedirect);
728       if (auto *FE = dyn_cast<FileEntry>(E))
729         return FE->getExternalContentsPath();
730       return None;
731     }
732   };
733 
734 private:
735   friend class RedirectingFSDirIterImpl;
736   friend class RedirectingFileSystemParser;
737 
shouldUseExternalFS()738   bool shouldUseExternalFS() const { return IsFallthrough; }
739 
740   /// Canonicalize path by removing ".", "..", "./", components. This is
741   /// a VFS request, do not bother about symlinks in the path components
742   /// but canonicalize in order to perform the correct entry search.
743   std::error_code makeCanonical(SmallVectorImpl<char> &Path) const;
744 
745   /// Whether to fall back to the external file system when an operation fails
746   /// with the given error code on a path associated with the provided Entry.
747   bool shouldFallBackToExternalFS(std::error_code EC, Entry *E = nullptr) const;
748 
749   // In a RedirectingFileSystem, keys can be specified in Posix or Windows
750   // style (or even a mixture of both), so this comparison helper allows
751   // slashes (representing a root) to match backslashes (and vice versa).  Note
752   // that, other than the root, path components should not contain slashes or
753   // backslashes.
pathComponentMatches(llvm::StringRef lhs,llvm::StringRef rhs)754   bool pathComponentMatches(llvm::StringRef lhs, llvm::StringRef rhs) const {
755     if ((CaseSensitive ? lhs.equals(rhs) : lhs.equals_insensitive(rhs)))
756       return true;
757     return (lhs == "/" && rhs == "\\") || (lhs == "\\" && rhs == "/");
758   }
759 
760   /// The root(s) of the virtual file system.
761   std::vector<std::unique_ptr<Entry>> Roots;
762 
763   /// The current working directory of the file system.
764   std::string WorkingDirectory;
765 
766   /// The file system to use for external references.
767   IntrusiveRefCntPtr<FileSystem> ExternalFS;
768 
769   /// If IsRelativeOverlay is set, this represents the directory
770   /// path that should be prefixed to each 'external-contents' entry
771   /// when reading from YAML files.
772   std::string ExternalContentsPrefixDir;
773 
774   /// @name Configuration
775   /// @{
776 
777   /// Whether to perform case-sensitive comparisons.
778   ///
779   /// Currently, case-insensitive matching only works correctly with ASCII.
780   bool CaseSensitive =
781 #ifdef _WIN32
782       false;
783 #else
784       true;
785 #endif
786 
787   /// IsRelativeOverlay marks whether a ExternalContentsPrefixDir path must
788   /// be prefixed in every 'external-contents' when reading from YAML files.
789   bool IsRelativeOverlay = false;
790 
791   /// Whether to use to use the value of 'external-contents' for the
792   /// names of files.  This global value is overridable on a per-file basis.
793   bool UseExternalNames = true;
794 
795   /// Whether to attempt a file lookup in external file system after it wasn't
796   /// found in VFS.
797   bool IsFallthrough = true;
798   /// @}
799 
800   RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS);
801 
802   /// Looks up the path <tt>[Start, End)</tt> in \p From, possibly recursing
803   /// into the contents of \p From if it is a directory. Returns a LookupResult
804   /// giving the matched entry and, if that entry is a FileEntry or
805   /// DirectoryRemapEntry, the path it redirects to in the external file system.
806   ErrorOr<LookupResult> lookupPathImpl(llvm::sys::path::const_iterator Start,
807                                        llvm::sys::path::const_iterator End,
808                                        Entry *From) const;
809 
810   /// Get the status for a path with the provided \c LookupResult.
811   ErrorOr<Status> status(const Twine &Path, const LookupResult &Result);
812 
813 public:
814   /// Looks up \p Path in \c Roots and returns a LookupResult giving the
815   /// matched entry and, if the entry was a FileEntry or DirectoryRemapEntry,
816   /// the path it redirects to in the external file system.
817   ErrorOr<LookupResult> lookupPath(StringRef Path) const;
818 
819   /// Parses \p Buffer, which is expected to be in YAML format and
820   /// returns a virtual file system representing its contents.
821   static std::unique_ptr<RedirectingFileSystem>
822   create(std::unique_ptr<MemoryBuffer> Buffer,
823          SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
824          void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS);
825 
826   /// Redirect each of the remapped files from first to second.
827   static std::unique_ptr<RedirectingFileSystem>
828   create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles,
829          bool UseExternalNames, FileSystem &ExternalFS);
830 
831   ErrorOr<Status> status(const Twine &Path) override;
832   ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
833 
834   std::error_code getRealPath(const Twine &Path,
835                               SmallVectorImpl<char> &Output) const override;
836 
837   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
838 
839   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
840 
841   std::error_code isLocal(const Twine &Path, bool &Result) override;
842 
843   std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const override;
844 
845   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
846 
847   void setExternalContentsPrefixDir(StringRef PrefixDir);
848 
849   StringRef getExternalContentsPrefixDir() const;
850 
851   void setFallthrough(bool Fallthrough);
852 
853   std::vector<llvm::StringRef> getRoots() const;
854 
855   void dump(raw_ostream &OS) const;
856   void dumpEntry(raw_ostream &OS, Entry *E, int NumSpaces = 0) const;
857 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
858   LLVM_DUMP_METHOD void dump() const;
859 #endif
860 };
861 
862 /// Collect all pairs of <virtual path, real path> entries from the
863 /// \p YAMLFilePath. This is used by the module dependency collector to forward
864 /// the entries into the reproducer output VFS YAML file.
865 void collectVFSFromYAML(
866     std::unique_ptr<llvm::MemoryBuffer> Buffer,
867     llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
868     SmallVectorImpl<YAMLVFSEntry> &CollectedEntries,
869     void *DiagContext = nullptr,
870     IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
871 
872 class YAMLVFSWriter {
873   std::vector<YAMLVFSEntry> Mappings;
874   Optional<bool> IsCaseSensitive;
875   Optional<bool> IsOverlayRelative;
876   Optional<bool> UseExternalNames;
877   std::string OverlayDir;
878 
879   void addEntry(StringRef VirtualPath, StringRef RealPath, bool IsDirectory);
880 
881 public:
882   YAMLVFSWriter() = default;
883 
884   void addFileMapping(StringRef VirtualPath, StringRef RealPath);
885   void addDirectoryMapping(StringRef VirtualPath, StringRef RealPath);
886 
setCaseSensitivity(bool CaseSensitive)887   void setCaseSensitivity(bool CaseSensitive) {
888     IsCaseSensitive = CaseSensitive;
889   }
890 
setUseExternalNames(bool UseExtNames)891   void setUseExternalNames(bool UseExtNames) { UseExternalNames = UseExtNames; }
892 
setOverlayDir(StringRef OverlayDirectory)893   void setOverlayDir(StringRef OverlayDirectory) {
894     IsOverlayRelative = true;
895     OverlayDir.assign(OverlayDirectory.str());
896   }
897 
getMappings()898   const std::vector<YAMLVFSEntry> &getMappings() const { return Mappings; }
899 
900   void write(llvm::raw_ostream &OS);
901 };
902 
903 } // namespace vfs
904 } // namespace llvm
905 
906 #endif // LLVM_SUPPORT_VIRTUALFILESYSTEM_H
907