1 //===-- FileCollector.h -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_SUPPORT_FILECOLLECTOR_H
10 #define LLVM_SUPPORT_FILECOLLECTOR_H
11 
12 #include "llvm/ADT/StringMap.h"
13 #include "llvm/ADT/StringSet.h"
14 #include "llvm/Support/VirtualFileSystem.h"
15 #include <mutex>
16 #include <string>
17 
18 namespace llvm {
19 class FileCollectorFileSystem;
20 class Twine;
21 
22 class FileCollectorBase {
23 public:
24   FileCollectorBase();
25   virtual ~FileCollectorBase();
26 
27   void addFile(const Twine &file);
28   void addDirectory(const Twine &Dir);
29 
30 protected:
31   bool markAsSeen(StringRef Path) {
32     if (Path.empty())
33       return false;
34     return Seen.insert(Path).second;
35   }
36 
37   virtual void addFileImpl(StringRef SrcPath) = 0;
38 
39   virtual llvm::vfs::directory_iterator
40   addDirectoryImpl(const llvm::Twine &Dir,
41                    IntrusiveRefCntPtr<vfs::FileSystem> FS,
42                    std::error_code &EC) = 0;
43 
44   /// Synchronizes access to internal data structures.
45   std::mutex Mutex;
46 
47   /// Tracks already seen files so they can be skipped.
48   StringSet<> Seen;
49 };
50 
51 /// Captures file system interaction and generates data to be later replayed
52 /// with the RedirectingFileSystem.
53 ///
54 /// For any file that gets accessed we eventually create:
55 /// - a copy of the file inside Root
56 /// - a record in RedirectingFileSystem mapping that maps:
57 ///   current real path -> path to the copy in Root
58 ///
59 /// That intent is that later when the mapping is used by RedirectingFileSystem
60 /// it simulates the state of FS that we collected.
61 ///
62 /// We generate file copies and mapping lazily - see writeMapping and copyFiles.
63 /// We don't try to capture the state of the file at the exact time when it's
64 /// accessed. Files might get changed, deleted ... we record only the "final"
65 /// state.
66 ///
67 /// In order to preserve the relative topology of files we use their real paths
68 /// as relative paths inside of the Root.
69 class FileCollector : public FileCollectorBase {
70 public:
71   /// Helper utility that encapsulates the logic for canonicalizing a virtual
72   /// path and a path to copy from.
73   class PathCanonicalizer {
74   public:
75     struct PathStorage {
76       SmallString<256> CopyFrom;
77       SmallString<256> VirtualPath;
78     };
79 
80     /// Canonicalize a pair of virtual and real paths.
81     PathStorage canonicalize(StringRef SrcPath);
82 
83   private:
84     /// Replace with a (mostly) real path, or don't modify. Resolves symlinks
85     /// in the directory, using \a CachedDirs to avoid redundant lookups, but
86     /// leaves the filename as a possible symlink.
87     void updateWithRealPath(SmallVectorImpl<char> &Path);
88 
89     StringMap<std::string> CachedDirs;
90   };
91 
92   /// \p Root is the directory where collected files are will be stored.
93   /// \p OverlayRoot is VFS mapping root.
94   /// \p Root directory gets created in copyFiles unless it already exists.
95   FileCollector(std::string Root, std::string OverlayRoot);
96 
97   /// Write the yaml mapping (for the VFS) to the given file.
98   std::error_code writeMapping(StringRef MappingFile);
99 
100   /// Copy the files into the root directory.
101   ///
102   /// When StopOnError is true (the default) we abort as soon as one file
103   /// cannot be copied. This is relatively common, for example when a file was
104   /// removed after it was added to the mapping.
105   std::error_code copyFiles(bool StopOnError = true);
106 
107   /// Create a VFS that uses \p Collector to collect files accessed via \p
108   /// BaseFS.
109   static IntrusiveRefCntPtr<vfs::FileSystem>
110   createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
111                      std::shared_ptr<FileCollector> Collector);
112 
113 private:
114   friend FileCollectorFileSystem;
115 
116   void addFileToMapping(StringRef VirtualPath, StringRef RealPath) {
117     if (sys::fs::is_directory(VirtualPath))
118       VFSWriter.addDirectoryMapping(VirtualPath, RealPath);
119     else
120       VFSWriter.addFileMapping(VirtualPath, RealPath);
121   }
122 
123 protected:
124   void addFileImpl(StringRef SrcPath) override;
125 
126   llvm::vfs::directory_iterator
127   addDirectoryImpl(const llvm::Twine &Dir,
128                    IntrusiveRefCntPtr<vfs::FileSystem> FS,
129                    std::error_code &EC) override;
130 
131   /// The directory where collected files are copied to in copyFiles().
132   const std::string Root;
133 
134   /// The root directory where the VFS overlay lives.
135   const std::string OverlayRoot;
136 
137   /// The yaml mapping writer.
138   vfs::YAMLVFSWriter VFSWriter;
139 
140   /// Helper utility for canonicalizing paths.
141   PathCanonicalizer Canonicalizer;
142 };
143 
144 } // end namespace llvm
145 
146 #endif // LLVM_SUPPORT_FILECOLLECTOR_H
147