1 //===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the virtual file system interface vfs::FileSystem.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_SUPPORT_VIRTUALFILESYSTEM_H
15 #define LLVM_SUPPORT_VIRTUALFILESYSTEM_H
16 
17 #include "llvm/ADT/IntrusiveRefCntPtr.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/STLFunctionalExtras.h"
23 #include "llvm/Support/Chrono.h"
24 #include "llvm/Support/ErrorOr.h"
25 #include "llvm/Support/Errc.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include <cassert>
30 #include <cstdint>
31 #include <ctime>
32 #include <memory>
33 #include <stack>
34 #include <string>
35 #include <system_error>
36 #include <utility>
37 #include <vector>
38 
39 namespace llvm {
40 
41 class MemoryBuffer;
42 class MemoryBufferRef;
43 class Twine;
44 
45 namespace vfs {
46 
47 /// The result of a \p status operation.
48 class Status {
49   std::string Name;
50   llvm::sys::fs::UniqueID UID;
51   llvm::sys::TimePoint<> MTime;
52   uint32_t User;
53   uint32_t Group;
54   uint64_t Size;
55   llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::status_error;
56   llvm::sys::fs::perms Perms;
57 
58 public:
59   // FIXME: remove when files support multiple names
60   bool IsVFSMapped = false;
61 
62   /// Whether this entity has an external path different from the virtual path,
63   /// and the external path is exposed by leaking it through the abstraction.
64   /// For example, a RedirectingFileSystem will set this for paths where
65   /// UseExternalName is true.
66   ///
67   /// FIXME: Currently the external path is exposed by replacing the virtual
68   /// path in this Status object. Instead, we should leave the path in the
69   /// Status intact (matching the requested virtual path) - see
70   /// FileManager::getFileRef for how how we plan to fix this.
71   bool ExposesExternalVFSPath = false;
72 
73   Status() = default;
74   Status(const llvm::sys::fs::file_status &Status);
75   Status(const Twine &Name, llvm::sys::fs::UniqueID UID,
76          llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group,
77          uint64_t Size, llvm::sys::fs::file_type Type,
78          llvm::sys::fs::perms Perms);
79 
80   /// Get a copy of a Status with a different size.
81   static Status copyWithNewSize(const Status &In, uint64_t NewSize);
82   /// Get a copy of a Status with a different name.
83   static Status copyWithNewName(const Status &In, const Twine &NewName);
84   static Status copyWithNewName(const llvm::sys::fs::file_status &In,
85                                 const Twine &NewName);
86 
87   /// Returns the name that should be used for this file or directory.
88   StringRef getName() const { return Name; }
89 
90   /// @name Status interface from llvm::sys::fs
91   /// @{
92   llvm::sys::fs::file_type getType() const { return Type; }
93   llvm::sys::fs::perms getPermissions() const { return Perms; }
94   llvm::sys::TimePoint<> getLastModificationTime() const { return MTime; }
95   llvm::sys::fs::UniqueID getUniqueID() const { return UID; }
96   uint32_t getUser() const { return User; }
97   uint32_t getGroup() const { return Group; }
98   uint64_t getSize() const { return Size; }
99   /// @}
100   /// @name Status queries
101   /// These are static queries in llvm::sys::fs.
102   /// @{
103   bool equivalent(const Status &Other) const;
104   bool isDirectory() const;
105   bool isRegularFile() const;
106   bool isOther() const;
107   bool isSymlink() const;
108   bool isStatusKnown() const;
109   bool exists() const;
110   /// @}
111 };
112 
113 /// Represents an open file.
114 class File {
115 public:
116   /// Destroy the file after closing it (if open).
117   /// Sub-classes should generally call close() inside their destructors.  We
118   /// cannot do that from the base class, since close is virtual.
119   virtual ~File();
120 
121   /// Get the status of the file.
122   virtual llvm::ErrorOr<Status> status() = 0;
123 
124   /// Get the name of the file
125   virtual llvm::ErrorOr<std::string> getName() {
126     if (auto Status = status())
127       return Status->getName().str();
128     else
129       return Status.getError();
130   }
131 
132   /// Get the contents of the file as a \p MemoryBuffer.
133   virtual llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
134   getBuffer(const Twine &Name, int64_t FileSize = -1,
135             bool RequiresNullTerminator = true, bool IsVolatile = false) = 0;
136 
137   /// Closes the file.
138   virtual std::error_code close() = 0;
139 
140   // Get the same file with a different path.
141   static ErrorOr<std::unique_ptr<File>>
142   getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P);
143 
144 protected:
145   // Set the file's underlying path.
146   virtual void setPath(const Twine &Path) {}
147 };
148 
149 /// A member of a directory, yielded by a directory_iterator.
150 /// Only information available on most platforms is included.
151 class directory_entry {
152   std::string Path;
153   llvm::sys::fs::file_type Type = llvm::sys::fs::file_type::type_unknown;
154 
155 public:
156   directory_entry() = default;
157   directory_entry(std::string Path, llvm::sys::fs::file_type Type)
158       : Path(std::move(Path)), Type(Type) {}
159 
160   llvm::StringRef path() const { return Path; }
161   llvm::sys::fs::file_type type() const { return Type; }
162 };
163 
164 namespace detail {
165 
166 /// An interface for virtual file systems to provide an iterator over the
167 /// (non-recursive) contents of a directory.
168 struct DirIterImpl {
169   virtual ~DirIterImpl();
170 
171   /// Sets \c CurrentEntry to the next entry in the directory on success,
172   /// to directory_entry() at end,  or returns a system-defined \c error_code.
173   virtual std::error_code increment() = 0;
174 
175   directory_entry CurrentEntry;
176 };
177 
178 } // namespace detail
179 
180 /// An input iterator over the entries in a virtual path, similar to
181 /// llvm::sys::fs::directory_iterator.
182 class directory_iterator {
183   std::shared_ptr<detail::DirIterImpl> Impl; // Input iterator semantics on copy
184 
185 public:
186   directory_iterator(std::shared_ptr<detail::DirIterImpl> I)
187       : Impl(std::move(I)) {
188     assert(Impl.get() != nullptr && "requires non-null implementation");
189     if (Impl->CurrentEntry.path().empty())
190       Impl.reset(); // Normalize the end iterator to Impl == nullptr.
191   }
192 
193   /// Construct an 'end' iterator.
194   directory_iterator() = default;
195 
196   /// Equivalent to operator++, with an error code.
197   directory_iterator &increment(std::error_code &EC) {
198     assert(Impl && "attempting to increment past end");
199     EC = Impl->increment();
200     if (Impl->CurrentEntry.path().empty())
201       Impl.reset(); // Normalize the end iterator to Impl == nullptr.
202     return *this;
203   }
204 
205   const directory_entry &operator*() const { return Impl->CurrentEntry; }
206   const directory_entry *operator->() const { return &Impl->CurrentEntry; }
207 
208   bool operator==(const directory_iterator &RHS) const {
209     if (Impl && RHS.Impl)
210       return Impl->CurrentEntry.path() == RHS.Impl->CurrentEntry.path();
211     return !Impl && !RHS.Impl;
212   }
213   bool operator!=(const directory_iterator &RHS) const {
214     return !(*this == RHS);
215   }
216 };
217 
218 class FileSystem;
219 
220 namespace detail {
221 
222 /// Keeps state for the recursive_directory_iterator.
223 struct RecDirIterState {
224   std::stack<directory_iterator, std::vector<directory_iterator>> Stack;
225   bool HasNoPushRequest = false;
226 };
227 
228 } // end namespace detail
229 
230 /// An input iterator over the recursive contents of a virtual path,
231 /// similar to llvm::sys::fs::recursive_directory_iterator.
232 class recursive_directory_iterator {
233   FileSystem *FS;
234   std::shared_ptr<detail::RecDirIterState>
235       State; // Input iterator semantics on copy.
236 
237 public:
238   recursive_directory_iterator(FileSystem &FS, const Twine &Path,
239                                std::error_code &EC);
240 
241   /// Construct an 'end' iterator.
242   recursive_directory_iterator() = default;
243 
244   /// Equivalent to operator++, with an error code.
245   recursive_directory_iterator &increment(std::error_code &EC);
246 
247   const directory_entry &operator*() const { return *State->Stack.top(); }
248   const directory_entry *operator->() const { return &*State->Stack.top(); }
249 
250   bool operator==(const recursive_directory_iterator &Other) const {
251     return State == Other.State; // identity
252   }
253   bool operator!=(const recursive_directory_iterator &RHS) const {
254     return !(*this == RHS);
255   }
256 
257   /// Gets the current level. Starting path is at level 0.
258   int level() const {
259     assert(!State->Stack.empty() &&
260            "Cannot get level without any iteration state");
261     return State->Stack.size() - 1;
262   }
263 
264   void no_push() { State->HasNoPushRequest = true; }
265 };
266 
267 /// The virtual file system interface.
268 class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem> {
269 public:
270   virtual ~FileSystem();
271 
272   /// Get the status of the entry at \p Path, if one exists.
273   virtual llvm::ErrorOr<Status> status(const Twine &Path) = 0;
274 
275   /// Get a \p File object for the file at \p Path, if one exists.
276   virtual llvm::ErrorOr<std::unique_ptr<File>>
277   openFileForRead(const Twine &Path) = 0;
278 
279   /// This is a convenience method that opens a file, gets its content and then
280   /// closes the file.
281   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
282   getBufferForFile(const Twine &Name, int64_t FileSize = -1,
283                    bool RequiresNullTerminator = true, bool IsVolatile = false);
284 
285   /// Get a directory_iterator for \p Dir.
286   /// \note The 'end' iterator is directory_iterator().
287   virtual directory_iterator dir_begin(const Twine &Dir,
288                                        std::error_code &EC) = 0;
289 
290   /// Set the working directory. This will affect all following operations on
291   /// this file system and may propagate down for nested file systems.
292   virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0;
293 
294   /// Get the working directory of this file system.
295   virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0;
296 
297   /// Gets real path of \p Path e.g. collapse all . and .. patterns, resolve
298   /// symlinks. For real file system, this uses `llvm::sys::fs::real_path`.
299   /// This returns errc::operation_not_permitted if not implemented by subclass.
300   virtual std::error_code getRealPath(const Twine &Path,
301                                       SmallVectorImpl<char> &Output) const;
302 
303   /// Check whether a file exists. Provided for convenience.
304   bool exists(const Twine &Path);
305 
306   /// Is the file mounted on a local filesystem?
307   virtual std::error_code isLocal(const Twine &Path, bool &Result);
308 
309   /// Make \a Path an absolute path.
310   ///
311   /// Makes \a Path absolute using the current directory if it is not already.
312   /// An empty \a Path will result in the current directory.
313   ///
314   /// /absolute/path   => /absolute/path
315   /// relative/../path => <current-directory>/relative/../path
316   ///
317   /// \param Path A path that is modified to be an absolute path.
318   /// \returns success if \a path has been made absolute, otherwise a
319   ///          platform-specific error_code.
320   virtual std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
321 
322   enum class PrintType { Summary, Contents, RecursiveContents };
323   void print(raw_ostream &OS, PrintType Type = PrintType::Contents,
324              unsigned IndentLevel = 0) const {
325     printImpl(OS, Type, IndentLevel);
326   }
327 
328 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
329   LLVM_DUMP_METHOD void dump() const;
330 #endif
331 
332 protected:
333   virtual void printImpl(raw_ostream &OS, PrintType Type,
334                          unsigned IndentLevel) const {
335     printIndent(OS, IndentLevel);
336     OS << "FileSystem\n";
337   }
338 
339   void printIndent(raw_ostream &OS, unsigned IndentLevel) const {
340     for (unsigned i = 0; i < IndentLevel; ++i)
341       OS << "  ";
342   }
343 };
344 
345 /// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
346 /// the operating system.
347 /// The working directory is linked to the process's working directory.
348 /// (This is usually thread-hostile).
349 IntrusiveRefCntPtr<FileSystem> getRealFileSystem();
350 
351 /// Create an \p vfs::FileSystem for the 'real' file system, as seen by
352 /// the operating system.
353 /// It has its own working directory, independent of (but initially equal to)
354 /// that of the process.
355 std::unique_ptr<FileSystem> createPhysicalFileSystem();
356 
357 /// A file system that allows overlaying one \p AbstractFileSystem on top
358 /// of another.
359 ///
360 /// Consists of a stack of >=1 \p FileSystem objects, which are treated as being
361 /// one merged file system. When there is a directory that exists in more than
362 /// one file system, the \p OverlayFileSystem contains a directory containing
363 /// the union of their contents.  The attributes (permissions, etc.) of the
364 /// top-most (most recently added) directory are used.  When there is a file
365 /// that exists in more than one file system, the file in the top-most file
366 /// system overrides the other(s).
367 class OverlayFileSystem : public FileSystem {
368   using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>;
369 
370   /// The stack of file systems, implemented as a list in order of
371   /// their addition.
372   FileSystemList FSList;
373 
374 public:
375   OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base);
376 
377   /// Pushes a file system on top of the stack.
378   void pushOverlay(IntrusiveRefCntPtr<FileSystem> FS);
379 
380   llvm::ErrorOr<Status> status(const Twine &Path) override;
381   llvm::ErrorOr<std::unique_ptr<File>>
382   openFileForRead(const Twine &Path) override;
383   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
384   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
385   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
386   std::error_code isLocal(const Twine &Path, bool &Result) override;
387   std::error_code getRealPath(const Twine &Path,
388                               SmallVectorImpl<char> &Output) const override;
389 
390   using iterator = FileSystemList::reverse_iterator;
391   using const_iterator = FileSystemList::const_reverse_iterator;
392   using reverse_iterator = FileSystemList::iterator;
393   using const_reverse_iterator = FileSystemList::const_iterator;
394   using range = iterator_range<iterator>;
395   using const_range = iterator_range<const_iterator>;
396 
397   /// Get an iterator pointing to the most recently added file system.
398   iterator overlays_begin() { return FSList.rbegin(); }
399   const_iterator overlays_begin() const { return FSList.rbegin(); }
400 
401   /// Get an iterator pointing one-past the least recently added file system.
402   iterator overlays_end() { return FSList.rend(); }
403   const_iterator overlays_end() const { return FSList.rend(); }
404 
405   /// Get an iterator pointing to the least recently added file system.
406   reverse_iterator overlays_rbegin() { return FSList.begin(); }
407   const_reverse_iterator overlays_rbegin() const { return FSList.begin(); }
408 
409   /// Get an iterator pointing one-past the most recently added file system.
410   reverse_iterator overlays_rend() { return FSList.end(); }
411   const_reverse_iterator overlays_rend() const { return FSList.end(); }
412 
413   range overlays_range() { return llvm::reverse(FSList); }
414   const_range overlays_range() const { return llvm::reverse(FSList); }
415 
416 protected:
417   void printImpl(raw_ostream &OS, PrintType Type,
418                  unsigned IndentLevel) const override;
419 };
420 
421 /// By default, this delegates all calls to the underlying file system. This
422 /// is useful when derived file systems want to override some calls and still
423 /// proxy other calls.
424 class ProxyFileSystem : public FileSystem {
425 public:
426   explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS)
427       : FS(std::move(FS)) {}
428 
429   llvm::ErrorOr<Status> status(const Twine &Path) override {
430     return FS->status(Path);
431   }
432   llvm::ErrorOr<std::unique_ptr<File>>
433   openFileForRead(const Twine &Path) override {
434     return FS->openFileForRead(Path);
435   }
436   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override {
437     return FS->dir_begin(Dir, EC);
438   }
439   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
440     return FS->getCurrentWorkingDirectory();
441   }
442   std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
443     return FS->setCurrentWorkingDirectory(Path);
444   }
445   std::error_code getRealPath(const Twine &Path,
446                               SmallVectorImpl<char> &Output) const override {
447     return FS->getRealPath(Path, Output);
448   }
449   std::error_code isLocal(const Twine &Path, bool &Result) override {
450     return FS->isLocal(Path, Result);
451   }
452 
453 protected:
454   FileSystem &getUnderlyingFS() { return *FS; }
455 
456 private:
457   IntrusiveRefCntPtr<FileSystem> FS;
458 
459   virtual void anchor();
460 };
461 
462 namespace detail {
463 
464 class InMemoryDirectory;
465 class InMemoryNode;
466 
467 struct NewInMemoryNodeInfo {
468   llvm::sys::fs::UniqueID DirUID;
469   StringRef Path;
470   StringRef Name;
471   time_t ModificationTime;
472   std::unique_ptr<llvm::MemoryBuffer> Buffer;
473   uint32_t User;
474   uint32_t Group;
475   llvm::sys::fs::file_type Type;
476   llvm::sys::fs::perms Perms;
477 
478   Status makeStatus() const;
479 };
480 
481 class NamedNodeOrError {
482   ErrorOr<std::pair<llvm::SmallString<128>, const detail::InMemoryNode *>>
483       Value;
484 
485 public:
486   NamedNodeOrError(llvm::SmallString<128> Name,
487                    const detail::InMemoryNode *Node)
488       : Value(std::make_pair(Name, Node)) {}
489   NamedNodeOrError(std::error_code EC) : Value(EC) {}
490   NamedNodeOrError(llvm::errc EC) : Value(EC) {}
491 
492   StringRef getName() const { return (*Value).first; }
493   explicit operator bool() const { return static_cast<bool>(Value); }
494   operator std::error_code() const { return Value.getError(); }
495   std::error_code getError() const { return Value.getError(); }
496   const detail::InMemoryNode *operator*() const { return (*Value).second; }
497 };
498 
499 } // namespace detail
500 
501 /// An in-memory file system.
502 class InMemoryFileSystem : public FileSystem {
503   std::unique_ptr<detail::InMemoryDirectory> Root;
504   std::string WorkingDirectory;
505   bool UseNormalizedPaths = true;
506 
507   using MakeNodeFn = llvm::function_ref<std::unique_ptr<detail::InMemoryNode>(
508       detail::NewInMemoryNodeInfo)>;
509 
510   /// Create node with \p MakeNode and add it into this filesystem at \p Path.
511   bool addFile(const Twine &Path, time_t ModificationTime,
512                std::unique_ptr<llvm::MemoryBuffer> Buffer,
513                Optional<uint32_t> User, Optional<uint32_t> Group,
514                Optional<llvm::sys::fs::file_type> Type,
515                Optional<llvm::sys::fs::perms> Perms, MakeNodeFn MakeNode);
516 
517   /// Looks up the in-memory node for the path \param P.
518   /// If \param FollowFinalSymlink is true, the returned node is guaranteed to
519   /// not be a symlink and its path may differ from \param P.
520   detail::NamedNodeOrError lookupNode(const Twine &P, bool FollowFinalSymlink,
521                                       size_t SymlinkDepth = 0) const;
522 
523   class DirIterator;
524 
525 public:
526   explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
527   ~InMemoryFileSystem() override;
528 
529   /// Add a file containing a buffer or a directory to the VFS with a
530   /// path. The VFS owns the buffer.  If present, User, Group, Type
531   /// and Perms apply to the newly-created file or directory.
532   /// \return true if the file or directory was successfully added,
533   /// false if the file or directory already exists in the file system with
534   /// different contents.
535   bool addFile(const Twine &Path, time_t ModificationTime,
536                std::unique_ptr<llvm::MemoryBuffer> Buffer,
537                Optional<uint32_t> User = None, Optional<uint32_t> Group = None,
538                Optional<llvm::sys::fs::file_type> Type = None,
539                Optional<llvm::sys::fs::perms> Perms = None);
540 
541   /// Add a hard link to a file.
542   ///
543   /// Here hard links are not intended to be fully equivalent to the classical
544   /// filesystem. Both the hard link and the file share the same buffer and
545   /// status (and thus have the same UniqueID). Because of this there is no way
546   /// to distinguish between the link and the file after the link has been
547   /// added.
548   ///
549   /// The \param Target path must be an existing file or a hardlink. The
550   /// \param NewLink file must not have been added before. The \param Target
551   /// path must not be a directory. The \param NewLink node is added as a hard
552   /// link which points to the resolved file of \param Target node.
553   /// \return true if the above condition is satisfied and hardlink was
554   /// successfully created, false otherwise.
555   bool addHardLink(const Twine &NewLink, const Twine &Target);
556 
557   /// Arbitrary max depth to search through symlinks. We can get into problems
558   /// if a link links to a link that links back to the link, for example.
559   static constexpr size_t MaxSymlinkDepth = 16;
560 
561   /// Add a symbolic link. Unlike a HardLink, because \param Target doesn't need
562   /// to refer to a file (or refer to anything, as it happens). Also, an
563   /// in-memory directory for \param Target isn't automatically created.
564   bool addSymbolicLink(const Twine &NewLink, const Twine &Target,
565                        time_t ModificationTime, Optional<uint32_t> User = None,
566                        Optional<uint32_t> Group = None,
567                        Optional<llvm::sys::fs::perms> Perms = None);
568 
569   /// Add a buffer to the VFS with a path. The VFS does not own the buffer.
570   /// If present, User, Group, Type and Perms apply to the newly-created file
571   /// or directory.
572   /// \return true if the file or directory was successfully added,
573   /// false if the file or directory already exists in the file system with
574   /// different contents.
575   bool addFileNoOwn(const Twine &Path, time_t ModificationTime,
576                     const llvm::MemoryBufferRef &Buffer,
577                     Optional<uint32_t> User = None,
578                     Optional<uint32_t> Group = None,
579                     Optional<llvm::sys::fs::file_type> Type = None,
580                     Optional<llvm::sys::fs::perms> Perms = None);
581 
582   std::string toString() const;
583 
584   /// Return true if this file system normalizes . and .. in paths.
585   bool useNormalizedPaths() const { return UseNormalizedPaths; }
586 
587   llvm::ErrorOr<Status> status(const Twine &Path) override;
588   llvm::ErrorOr<std::unique_ptr<File>>
589   openFileForRead(const Twine &Path) override;
590   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
591 
592   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
593     return WorkingDirectory;
594   }
595   /// Canonicalizes \p Path by combining with the current working
596   /// directory and normalizing the path (e.g. remove dots). If the current
597   /// working directory is not set, this returns errc::operation_not_permitted.
598   ///
599   /// This doesn't resolve symlinks as they are not supported in in-memory file
600   /// system.
601   std::error_code getRealPath(const Twine &Path,
602                               SmallVectorImpl<char> &Output) const override;
603   std::error_code isLocal(const Twine &Path, bool &Result) override;
604   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
605 
606 protected:
607   void printImpl(raw_ostream &OS, PrintType Type,
608                  unsigned IndentLevel) const override;
609 };
610 
611 /// Get a globally unique ID for a virtual file or directory.
612 llvm::sys::fs::UniqueID getNextVirtualUniqueID();
613 
614 /// Gets a \p FileSystem for a virtual file system described in YAML
615 /// format.
616 std::unique_ptr<FileSystem>
617 getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer,
618                llvm::SourceMgr::DiagHandlerTy DiagHandler,
619                StringRef YAMLFilePath, void *DiagContext = nullptr,
620                IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
621 
622 struct YAMLVFSEntry {
623   template <typename T1, typename T2>
624   YAMLVFSEntry(T1 &&VPath, T2 &&RPath, bool IsDirectory = false)
625       : VPath(std::forward<T1>(VPath)), RPath(std::forward<T2>(RPath)),
626         IsDirectory(IsDirectory) {}
627   std::string VPath;
628   std::string RPath;
629   bool IsDirectory = false;
630 };
631 
632 class RedirectingFSDirIterImpl;
633 class RedirectingFileSystemParser;
634 
635 /// A virtual file system parsed from a YAML file.
636 ///
637 /// Currently, this class allows creating virtual files and directories. Virtual
638 /// files map to existing external files in \c ExternalFS, and virtual
639 /// directories may either map to existing directories in \c ExternalFS or list
640 /// their contents in the form of other virtual directories and/or files.
641 ///
642 /// The basic structure of the parsed file is:
643 /// \verbatim
644 /// {
645 ///   'version': <version number>,
646 ///   <optional configuration>
647 ///   'roots': [
648 ///              <directory entries>
649 ///            ]
650 /// }
651 /// \endverbatim
652 ///
653 /// The roots may be absolute or relative. If relative they will be made
654 /// absolute against the current working directory.
655 ///
656 /// All configuration options are optional.
657 ///   'case-sensitive': <boolean, default=(true for Posix, false for Windows)>
658 ///   'use-external-names': <boolean, default=true>
659 ///   'overlay-relative': <boolean, default=false>
660 ///   'fallthrough': <boolean, default=true, deprecated - use 'redirecting-with'
661 ///                   instead>
662 ///   'redirecting-with': <string, one of 'fallthrough', 'fallback', or
663 ///                        'redirect-only', default='fallthrough'>
664 ///
665 /// Virtual directories that list their contents are represented as
666 /// \verbatim
667 /// {
668 ///   'type': 'directory',
669 ///   'name': <string>,
670 ///   'contents': [ <file or directory entries> ]
671 /// }
672 /// \endverbatim
673 ///
674 /// The default attributes for such virtual directories are:
675 /// \verbatim
676 /// MTime = now() when created
677 /// Perms = 0777
678 /// User = Group = 0
679 /// Size = 0
680 /// UniqueID = unspecified unique value
681 /// \endverbatim
682 ///
683 /// When a path prefix matches such a directory, the next component in the path
684 /// is matched against the entries in the 'contents' array.
685 ///
686 /// Re-mapped directories, on the other hand, are represented as
687 /// /// \verbatim
688 /// {
689 ///   'type': 'directory-remap',
690 ///   'name': <string>,
691 ///   'use-external-name': <boolean>, # Optional
692 ///   'external-contents': <path to external directory>
693 /// }
694 /// \endverbatim
695 ///
696 /// and inherit their attributes from the external directory. When a path
697 /// prefix matches such an entry, the unmatched components are appended to the
698 /// 'external-contents' path, and the resulting path is looked up in the
699 /// external file system instead.
700 ///
701 /// Re-mapped files are represented as
702 /// \verbatim
703 /// {
704 ///   'type': 'file',
705 ///   'name': <string>,
706 ///   'use-external-name': <boolean>, # Optional
707 ///   'external-contents': <path to external file>
708 /// }
709 /// \endverbatim
710 ///
711 /// Their attributes and file contents are determined by looking up the file at
712 /// their 'external-contents' path in the external file system.
713 ///
714 /// For 'file', 'directory' and 'directory-remap' entries the 'name' field may
715 /// contain multiple path components (e.g. /path/to/file). However, any
716 /// directory in such a path that contains more than one child must be uniquely
717 /// represented by a 'directory' entry.
718 ///
719 /// When the 'use-external-name' field is set, calls to \a vfs::File::status()
720 /// give the external (remapped) filesystem name instead of the name the file
721 /// was accessed by. This is an intentional leak through the \a
722 /// RedirectingFileSystem abstraction layer. It enables clients to discover
723 /// (and use) the external file location when communicating with users or tools
724 /// that don't use the same VFS overlay.
725 ///
726 /// FIXME: 'use-external-name' causes behaviour that's inconsistent with how
727 /// "real" filesystems behave. Maybe there should be a separate channel for
728 /// this information.
729 class RedirectingFileSystem : public vfs::FileSystem {
730 public:
731   enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File };
732   enum NameKind { NK_NotSet, NK_External, NK_Virtual };
733 
734   /// The type of redirection to perform.
735   enum class RedirectKind {
736     /// Lookup the redirected path first (ie. the one specified in
737     /// 'external-contents') and if that fails "fallthrough" to a lookup of the
738     /// originally provided path.
739     Fallthrough,
740     /// Lookup the provided path first and if that fails, "fallback" to a
741     /// lookup of the redirected path.
742     Fallback,
743     /// Only lookup the redirected path, do not lookup the originally provided
744     /// path.
745     RedirectOnly
746   };
747 
748   /// A single file or directory in the VFS.
749   class Entry {
750     EntryKind Kind;
751     std::string Name;
752 
753   public:
754     Entry(EntryKind K, StringRef Name) : Kind(K), Name(Name) {}
755     virtual ~Entry() = default;
756 
757     StringRef getName() const { return Name; }
758     EntryKind getKind() const { return Kind; }
759   };
760 
761   /// A directory in the vfs with explicitly specified contents.
762   class DirectoryEntry : public Entry {
763     std::vector<std::unique_ptr<Entry>> Contents;
764     Status S;
765 
766   public:
767     /// Constructs a directory entry with explicitly specified contents.
768     DirectoryEntry(StringRef Name, std::vector<std::unique_ptr<Entry>> Contents,
769                    Status S)
770         : Entry(EK_Directory, Name), Contents(std::move(Contents)),
771           S(std::move(S)) {}
772 
773     /// Constructs an empty directory entry.
774     DirectoryEntry(StringRef Name, Status S)
775         : Entry(EK_Directory, Name), S(std::move(S)) {}
776 
777     Status getStatus() { return S; }
778 
779     void addContent(std::unique_ptr<Entry> Content) {
780       Contents.push_back(std::move(Content));
781     }
782 
783     Entry *getLastContent() const { return Contents.back().get(); }
784 
785     using iterator = decltype(Contents)::iterator;
786 
787     iterator contents_begin() { return Contents.begin(); }
788     iterator contents_end() { return Contents.end(); }
789 
790     static bool classof(const Entry *E) { return E->getKind() == EK_Directory; }
791   };
792 
793   /// A file or directory in the vfs that is mapped to a file or directory in
794   /// the external filesystem.
795   class RemapEntry : public Entry {
796     std::string ExternalContentsPath;
797     NameKind UseName;
798 
799   protected:
800     RemapEntry(EntryKind K, StringRef Name, StringRef ExternalContentsPath,
801                NameKind UseName)
802         : Entry(K, Name), ExternalContentsPath(ExternalContentsPath),
803           UseName(UseName) {}
804 
805   public:
806     StringRef getExternalContentsPath() const { return ExternalContentsPath; }
807 
808     /// Whether to use the external path as the name for this file or directory.
809     bool useExternalName(bool GlobalUseExternalName) const {
810       return UseName == NK_NotSet ? GlobalUseExternalName
811                                   : (UseName == NK_External);
812     }
813 
814     NameKind getUseName() const { return UseName; }
815 
816     static bool classof(const Entry *E) {
817       switch (E->getKind()) {
818       case EK_DirectoryRemap:
819         LLVM_FALLTHROUGH;
820       case EK_File:
821         return true;
822       case EK_Directory:
823         return false;
824       }
825       llvm_unreachable("invalid entry kind");
826     }
827   };
828 
829   /// A directory in the vfs that maps to a directory in the external file
830   /// system.
831   class DirectoryRemapEntry : public RemapEntry {
832   public:
833     DirectoryRemapEntry(StringRef Name, StringRef ExternalContentsPath,
834                         NameKind UseName)
835         : RemapEntry(EK_DirectoryRemap, Name, ExternalContentsPath, UseName) {}
836 
837     static bool classof(const Entry *E) {
838       return E->getKind() == EK_DirectoryRemap;
839     }
840   };
841 
842   /// A file in the vfs that maps to a file in the external file system.
843   class FileEntry : public RemapEntry {
844   public:
845     FileEntry(StringRef Name, StringRef ExternalContentsPath, NameKind UseName)
846         : RemapEntry(EK_File, Name, ExternalContentsPath, UseName) {}
847 
848     static bool classof(const Entry *E) { return E->getKind() == EK_File; }
849   };
850 
851   /// Represents the result of a path lookup into the RedirectingFileSystem.
852   struct LookupResult {
853     /// The entry the looked-up path corresponds to.
854     Entry *E;
855 
856   private:
857     /// When the found Entry is a DirectoryRemapEntry, stores the path in the
858     /// external file system that the looked-up path in the virtual file system
859     //  corresponds to.
860     Optional<std::string> ExternalRedirect;
861 
862   public:
863     LookupResult(Entry *E, sys::path::const_iterator Start,
864                  sys::path::const_iterator End);
865 
866     /// If the found Entry maps the the input path to a path in the external
867     /// file system (i.e. it is a FileEntry or DirectoryRemapEntry), returns
868     /// that path.
869     Optional<StringRef> getExternalRedirect() const {
870       if (isa<DirectoryRemapEntry>(E))
871         return StringRef(*ExternalRedirect);
872       if (auto *FE = dyn_cast<FileEntry>(E))
873         return FE->getExternalContentsPath();
874       return None;
875     }
876   };
877 
878 private:
879   friend class RedirectingFSDirIterImpl;
880   friend class RedirectingFileSystemParser;
881 
882   /// Canonicalize path by removing ".", "..", "./", components. This is
883   /// a VFS request, do not bother about symlinks in the path components
884   /// but canonicalize in order to perform the correct entry search.
885   std::error_code makeCanonical(SmallVectorImpl<char> &Path) const;
886 
887   /// Get the File status, or error, from the underlying external file system.
888   /// This returns the status with the originally requested name, while looking
889   /// up the entry using the canonical path.
890   ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath,
891                                     const Twine &OriginalPath) const;
892 
893   // In a RedirectingFileSystem, keys can be specified in Posix or Windows
894   // style (or even a mixture of both), so this comparison helper allows
895   // slashes (representing a root) to match backslashes (and vice versa).  Note
896   // that, other than the root, path components should not contain slashes or
897   // backslashes.
898   bool pathComponentMatches(llvm::StringRef lhs, llvm::StringRef rhs) const {
899     if ((CaseSensitive ? lhs.equals(rhs) : lhs.equals_insensitive(rhs)))
900       return true;
901     return (lhs == "/" && rhs == "\\") || (lhs == "\\" && rhs == "/");
902   }
903 
904   /// The root(s) of the virtual file system.
905   std::vector<std::unique_ptr<Entry>> Roots;
906 
907   /// The current working directory of the file system.
908   std::string WorkingDirectory;
909 
910   /// The file system to use for external references.
911   IntrusiveRefCntPtr<FileSystem> ExternalFS;
912 
913   /// If IsRelativeOverlay is set, this represents the directory
914   /// path that should be prefixed to each 'external-contents' entry
915   /// when reading from YAML files.
916   std::string ExternalContentsPrefixDir;
917 
918   /// @name Configuration
919   /// @{
920 
921   /// Whether to perform case-sensitive comparisons.
922   ///
923   /// Currently, case-insensitive matching only works correctly with ASCII.
924   bool CaseSensitive = is_style_posix(sys::path::Style::native);
925 
926   /// IsRelativeOverlay marks whether a ExternalContentsPrefixDir path must
927   /// be prefixed in every 'external-contents' when reading from YAML files.
928   bool IsRelativeOverlay = false;
929 
930   /// Whether to use to use the value of 'external-contents' for the
931   /// names of files.  This global value is overridable on a per-file basis.
932   bool UseExternalNames = true;
933 
934   /// Determines the lookups to perform, as well as their order. See
935   /// \c RedirectKind for details.
936   RedirectKind Redirection = RedirectKind::Fallthrough;
937   /// @}
938 
939   RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS);
940 
941   /// Looks up the path <tt>[Start, End)</tt> in \p From, possibly recursing
942   /// into the contents of \p From if it is a directory. Returns a LookupResult
943   /// giving the matched entry and, if that entry is a FileEntry or
944   /// DirectoryRemapEntry, the path it redirects to in the external file system.
945   ErrorOr<LookupResult> lookupPathImpl(llvm::sys::path::const_iterator Start,
946                                        llvm::sys::path::const_iterator End,
947                                        Entry *From) const;
948 
949   /// Get the status for a path with the provided \c LookupResult.
950   ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath,
951                          const LookupResult &Result);
952 
953 public:
954   /// Looks up \p Path in \c Roots and returns a LookupResult giving the
955   /// matched entry and, if the entry was a FileEntry or DirectoryRemapEntry,
956   /// the path it redirects to in the external file system.
957   ErrorOr<LookupResult> lookupPath(StringRef Path) const;
958 
959   /// Parses \p Buffer, which is expected to be in YAML format and
960   /// returns a virtual file system representing its contents.
961   static std::unique_ptr<RedirectingFileSystem>
962   create(std::unique_ptr<MemoryBuffer> Buffer,
963          SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
964          void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS);
965 
966   /// Redirect each of the remapped files from first to second.
967   static std::unique_ptr<RedirectingFileSystem>
968   create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles,
969          bool UseExternalNames, FileSystem &ExternalFS);
970 
971   ErrorOr<Status> status(const Twine &Path) override;
972   ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
973 
974   std::error_code getRealPath(const Twine &Path,
975                               SmallVectorImpl<char> &Output) const override;
976 
977   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
978 
979   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
980 
981   std::error_code isLocal(const Twine &Path, bool &Result) override;
982 
983   std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const override;
984 
985   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
986 
987   void setExternalContentsPrefixDir(StringRef PrefixDir);
988 
989   StringRef getExternalContentsPrefixDir() const;
990 
991   /// Sets the redirection kind to \c Fallthrough if true or \c RedirectOnly
992   /// otherwise. Will removed in the future, use \c setRedirection instead.
993   void setFallthrough(bool Fallthrough);
994 
995   void setRedirection(RedirectingFileSystem::RedirectKind Kind);
996 
997   std::vector<llvm::StringRef> getRoots() const;
998 
999   void printEntry(raw_ostream &OS, Entry *E, unsigned IndentLevel = 0) const;
1000 
1001 protected:
1002   void printImpl(raw_ostream &OS, PrintType Type,
1003                  unsigned IndentLevel) const override;
1004 };
1005 
1006 /// Collect all pairs of <virtual path, real path> entries from the
1007 /// \p YAMLFilePath. This is used by the module dependency collector to forward
1008 /// the entries into the reproducer output VFS YAML file.
1009 void collectVFSFromYAML(
1010     std::unique_ptr<llvm::MemoryBuffer> Buffer,
1011     llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
1012     SmallVectorImpl<YAMLVFSEntry> &CollectedEntries,
1013     void *DiagContext = nullptr,
1014     IntrusiveRefCntPtr<FileSystem> ExternalFS = getRealFileSystem());
1015 
1016 class YAMLVFSWriter {
1017   std::vector<YAMLVFSEntry> Mappings;
1018   Optional<bool> IsCaseSensitive;
1019   Optional<bool> IsOverlayRelative;
1020   Optional<bool> UseExternalNames;
1021   std::string OverlayDir;
1022 
1023   void addEntry(StringRef VirtualPath, StringRef RealPath, bool IsDirectory);
1024 
1025 public:
1026   YAMLVFSWriter() = default;
1027 
1028   void addFileMapping(StringRef VirtualPath, StringRef RealPath);
1029   void addDirectoryMapping(StringRef VirtualPath, StringRef RealPath);
1030 
1031   void setCaseSensitivity(bool CaseSensitive) {
1032     IsCaseSensitive = CaseSensitive;
1033   }
1034 
1035   void setUseExternalNames(bool UseExtNames) { UseExternalNames = UseExtNames; }
1036 
1037   void setOverlayDir(StringRef OverlayDirectory) {
1038     IsOverlayRelative = true;
1039     OverlayDir.assign(OverlayDirectory.str());
1040   }
1041 
1042   const std::vector<YAMLVFSEntry> &getMappings() const { return Mappings; }
1043 
1044   void write(llvm::raw_ostream &OS);
1045 };
1046 
1047 } // namespace vfs
1048 } // namespace llvm
1049 
1050 #endif // LLVM_SUPPORT_VIRTUALFILESYSTEM_H
1051