1 //===-- FileCollector.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Support/FileCollector.h"
10 #include "llvm/ADT/SmallString.h"
11 #include "llvm/ADT/Twine.h"
12 #include "llvm/Support/FileSystem.h"
13 #include "llvm/Support/Path.h"
14 #include "llvm/Support/Process.h"
15 
16 using namespace llvm;
17 
18 FileCollectorBase::FileCollectorBase() = default;
19 FileCollectorBase::~FileCollectorBase() = default;
20 
addFile(const Twine & File)21 void FileCollectorBase::addFile(const Twine &File) {
22   std::lock_guard<std::mutex> lock(Mutex);
23   std::string FileStr = File.str();
24   if (markAsSeen(FileStr))
25     addFileImpl(FileStr);
26 }
27 
addDirectory(const Twine & Dir)28 void FileCollectorBase::addDirectory(const Twine &Dir) {
29   assert(sys::fs::is_directory(Dir));
30   std::error_code EC;
31   addDirectoryImpl(Dir, vfs::getRealFileSystem(), EC);
32 }
33 
isCaseSensitivePath(StringRef Path)34 static bool isCaseSensitivePath(StringRef Path) {
35   SmallString<256> TmpDest = Path, UpperDest, RealDest;
36 
37   // Remove component traversals, links, etc.
38   if (sys::fs::real_path(Path, TmpDest))
39     return true; // Current default value in vfs.yaml
40   Path = TmpDest;
41 
42   // Change path to all upper case and ask for its real path, if the latter
43   // exists and is equal to path, it's not case sensitive. Default to case
44   // sensitive in the absence of real_path, since this is the YAMLVFSWriter
45   // default.
46   UpperDest = Path.upper();
47   if (!sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest))
48     return false;
49   return true;
50 }
51 
FileCollector(std::string Root,std::string OverlayRoot)52 FileCollector::FileCollector(std::string Root, std::string OverlayRoot)
53     : Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) {
54 }
55 
updateWithRealPath(SmallVectorImpl<char> & Path)56 void FileCollector::PathCanonicalizer::updateWithRealPath(
57     SmallVectorImpl<char> &Path) {
58   StringRef SrcPath(Path.begin(), Path.size());
59   StringRef Filename = sys::path::filename(SrcPath);
60   StringRef Directory = sys::path::parent_path(SrcPath);
61 
62   // Use real_path to fix any symbolic link component present in the directory
63   // part of the path, caching the search because computing the real path is
64   // expensive.
65   SmallString<256> RealPath;
66   auto DirWithSymlink = CachedDirs.find(Directory);
67   if (DirWithSymlink == CachedDirs.end()) {
68     // FIXME: Should this be a call to FileSystem::getRealpath(), in some
69     // cases? What if there is nothing on disk?
70     if (sys::fs::real_path(Directory, RealPath))
71       return;
72     CachedDirs[Directory] = std::string(RealPath.str());
73   } else {
74     RealPath = DirWithSymlink->second;
75   }
76 
77   // Finish recreating the path by appending the original filename, since we
78   // don't need to resolve symlinks in the filename.
79   //
80   // FIXME: If we can cope with this, maybe we can cope without calling
81   // getRealPath() at all when there's no ".." component.
82   sys::path::append(RealPath, Filename);
83 
84   // Swap to create the output.
85   Path.swap(RealPath);
86 }
87 
88 /// Make Path absolute.
makeAbsolute(SmallVectorImpl<char> & Path)89 static void makeAbsolute(SmallVectorImpl<char> &Path) {
90   // We need an absolute src path to append to the root.
91   sys::fs::make_absolute(Path);
92 
93   // Canonicalize src to a native path to avoid mixed separator styles.
94   sys::path::native(Path);
95 
96   // Remove redundant leading "./" pieces and consecutive separators.
97   Path.erase(Path.begin(), sys::path::remove_leading_dotslash(
98                                StringRef(Path.begin(), Path.size()))
99                                .begin());
100 }
101 
102 FileCollector::PathCanonicalizer::PathStorage
canonicalize(StringRef SrcPath)103 FileCollector::PathCanonicalizer::canonicalize(StringRef SrcPath) {
104   PathStorage Paths;
105   Paths.VirtualPath = SrcPath;
106   makeAbsolute(Paths.VirtualPath);
107 
108   // If a ".." component is present after a symlink component, remove_dots may
109   // lead to the wrong real destination path. Let the source be canonicalized
110   // like that but make sure we always use the real path for the destination.
111   Paths.CopyFrom = Paths.VirtualPath;
112   updateWithRealPath(Paths.CopyFrom);
113 
114   // Canonicalize the virtual path by removing "..", "." components.
115   sys::path::remove_dots(Paths.VirtualPath, /*remove_dot_dot=*/true);
116 
117   return Paths;
118 }
119 
addFileImpl(StringRef SrcPath)120 void FileCollector::addFileImpl(StringRef SrcPath) {
121   PathCanonicalizer::PathStorage Paths = Canonicalizer.canonicalize(SrcPath);
122 
123   SmallString<256> DstPath = StringRef(Root);
124   sys::path::append(DstPath, sys::path::relative_path(Paths.CopyFrom));
125 
126   // Always map a canonical src path to its real path into the YAML, by doing
127   // this we map different virtual src paths to the same entry in the VFS
128   // overlay, which is a way to emulate symlink inside the VFS; this is also
129   // needed for correctness, not doing that can lead to module redefinition
130   // errors.
131   addFileToMapping(Paths.VirtualPath, DstPath);
132 }
133 
134 llvm::vfs::directory_iterator
addDirectoryImpl(const llvm::Twine & Dir,IntrusiveRefCntPtr<vfs::FileSystem> FS,std::error_code & EC)135 FileCollector::addDirectoryImpl(const llvm::Twine &Dir,
136                                 IntrusiveRefCntPtr<vfs::FileSystem> FS,
137                                 std::error_code &EC) {
138   auto It = FS->dir_begin(Dir, EC);
139   if (EC)
140     return It;
141   addFile(Dir);
142   for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) {
143     if (It->type() == sys::fs::file_type::regular_file ||
144         It->type() == sys::fs::file_type::directory_file ||
145         It->type() == sys::fs::file_type::symlink_file) {
146       addFile(It->path());
147     }
148   }
149   if (EC)
150     return It;
151   // Return a new iterator.
152   return FS->dir_begin(Dir, EC);
153 }
154 
155 /// Set the access and modification time for the given file from the given
156 /// status object.
157 static std::error_code
copyAccessAndModificationTime(StringRef Filename,const sys::fs::file_status & Stat)158 copyAccessAndModificationTime(StringRef Filename,
159                               const sys::fs::file_status &Stat) {
160   int FD;
161 
162   if (auto EC =
163           sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
164     return EC;
165 
166   if (auto EC = sys::fs::setLastAccessAndModificationTime(
167           FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
168     return EC;
169 
170   if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
171     return EC;
172 
173   return {};
174 }
175 
copyFiles(bool StopOnError)176 std::error_code FileCollector::copyFiles(bool StopOnError) {
177   auto Err = sys::fs::create_directories(Root, /*IgnoreExisting=*/true);
178   if (Err) {
179     return Err;
180   }
181 
182   std::lock_guard<std::mutex> lock(Mutex);
183 
184   for (auto &entry : VFSWriter.getMappings()) {
185     // Get the status of the original file/directory.
186     sys::fs::file_status Stat;
187     if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) {
188       if (StopOnError)
189         return EC;
190       continue;
191     }
192 
193     // Continue if the file doesn't exist.
194     if (Stat.type() == sys::fs::file_type::file_not_found)
195       continue;
196 
197     // Create directory tree.
198     if (std::error_code EC =
199             sys::fs::create_directories(sys::path::parent_path(entry.RPath),
200                                         /*IgnoreExisting=*/true)) {
201       if (StopOnError)
202         return EC;
203     }
204 
205     if (Stat.type() == sys::fs::file_type::directory_file) {
206       // Construct a directory when it's just a directory entry.
207       if (std::error_code EC =
208               sys::fs::create_directories(entry.RPath,
209                                           /*IgnoreExisting=*/true)) {
210         if (StopOnError)
211           return EC;
212       }
213       continue;
214     }
215 
216     // Copy file over.
217     if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) {
218       if (StopOnError)
219         return EC;
220     }
221 
222     // Copy over permissions.
223     if (auto perms = sys::fs::getPermissions(entry.VPath)) {
224       if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) {
225         if (StopOnError)
226           return EC;
227       }
228     }
229 
230     // Copy over modification time.
231     copyAccessAndModificationTime(entry.RPath, Stat);
232   }
233   return {};
234 }
235 
writeMapping(StringRef MappingFile)236 std::error_code FileCollector::writeMapping(StringRef MappingFile) {
237   std::lock_guard<std::mutex> lock(Mutex);
238 
239   VFSWriter.setOverlayDir(OverlayRoot);
240   VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot));
241   VFSWriter.setUseExternalNames(false);
242 
243   std::error_code EC;
244   raw_fd_ostream os(MappingFile, EC, sys::fs::OF_TextWithCRLF);
245   if (EC)
246     return EC;
247 
248   VFSWriter.write(os);
249 
250   return {};
251 }
252 
253 namespace llvm {
254 
255 class FileCollectorFileSystem : public vfs::FileSystem {
256 public:
FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS,std::shared_ptr<FileCollector> Collector)257   explicit FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS,
258                                    std::shared_ptr<FileCollector> Collector)
259       : FS(std::move(FS)), Collector(std::move(Collector)) {}
260 
status(const Twine & Path)261   llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override {
262     auto Result = FS->status(Path);
263     if (Result && Result->exists())
264       Collector->addFile(Path);
265     return Result;
266   }
267 
268   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
openFileForRead(const Twine & Path)269   openFileForRead(const Twine &Path) override {
270     auto Result = FS->openFileForRead(Path);
271     if (Result && *Result)
272       Collector->addFile(Path);
273     return Result;
274   }
275 
dir_begin(const llvm::Twine & Dir,std::error_code & EC)276   llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir,
277                                           std::error_code &EC) override {
278     return Collector->addDirectoryImpl(Dir, FS, EC);
279   }
280 
getRealPath(const Twine & Path,SmallVectorImpl<char> & Output) const281   std::error_code getRealPath(const Twine &Path,
282                               SmallVectorImpl<char> &Output) const override {
283     auto EC = FS->getRealPath(Path, Output);
284     if (!EC) {
285       Collector->addFile(Path);
286       if (Output.size() > 0)
287         Collector->addFile(Output);
288     }
289     return EC;
290   }
291 
isLocal(const Twine & Path,bool & Result)292   std::error_code isLocal(const Twine &Path, bool &Result) override {
293     return FS->isLocal(Path, Result);
294   }
295 
getCurrentWorkingDirectory() const296   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
297     return FS->getCurrentWorkingDirectory();
298   }
299 
setCurrentWorkingDirectory(const llvm::Twine & Path)300   std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override {
301     return FS->setCurrentWorkingDirectory(Path);
302   }
303 
304 private:
305   IntrusiveRefCntPtr<vfs::FileSystem> FS;
306   std::shared_ptr<FileCollector> Collector;
307 };
308 
309 } // namespace llvm
310 
311 IntrusiveRefCntPtr<vfs::FileSystem>
createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,std::shared_ptr<FileCollector> Collector)312 FileCollector::createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
313                                   std::shared_ptr<FileCollector> Collector) {
314   return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector));
315 }
316