1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10 #include "llvm/Support/MemoryBuffer.h"
11 #include "llvm/Support/SmallVectorMemoryBuffer.h"
12 #include "llvm/Support/Threading.h"
13 #include <optional>
14 
15 using namespace clang;
16 using namespace tooling;
17 using namespace dependencies;
18 
19 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
20 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
21   // Load the file and its content from the file system.
22   auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
23   if (!MaybeFile)
24     return MaybeFile.getError();
25   auto File = std::move(*MaybeFile);
26 
27   auto MaybeStat = File->status();
28   if (!MaybeStat)
29     return MaybeStat.getError();
30   auto Stat = std::move(*MaybeStat);
31 
32   auto MaybeBuffer = File->getBuffer(Stat.getName());
33   if (!MaybeBuffer)
34     return MaybeBuffer.getError();
35   auto Buffer = std::move(*MaybeBuffer);
36 
37   // If the file size changed between read and stat, pretend it didn't.
38   if (Stat.getSize() != Buffer->getBufferSize())
39     Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
40 
41   return TentativeEntry(Stat, std::move(Buffer));
42 }
43 
44 EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
45     const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
46   if (Entry.isError() || Entry.isDirectory() || Disable ||
47       !shouldScanForDirectives(Filename))
48     return EntryRef(Filename, Entry);
49 
50   CachedFileContents *Contents = Entry.getCachedContents();
51   assert(Contents && "contents not initialized");
52 
53   // Double-checked locking.
54   if (Contents->DepDirectives.load())
55     return EntryRef(Filename, Entry);
56 
57   std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
58 
59   // Double-checked locking.
60   if (Contents->DepDirectives.load())
61     return EntryRef(Filename, Entry);
62 
63   SmallVector<dependency_directives_scan::Directive, 64> Directives;
64   // Scan the file for preprocessor directives that might affect the
65   // dependencies.
66   if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
67                                         Contents->DepDirectiveTokens,
68                                         Directives)) {
69     Contents->DepDirectiveTokens.clear();
70     // FIXME: Propagate the diagnostic if desired by the client.
71     Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
72     return EntryRef(Filename, Entry);
73   }
74 
75   // This function performed double-checked locking using `DepDirectives`.
76   // Assigning it must be the last thing this function does, otherwise other
77   // threads may skip the
78   // critical section (`DepDirectives != nullptr`), leading to a data race.
79   Contents->DepDirectives.store(
80       new std::optional<DependencyDirectivesTy>(std::move(Directives)));
81   return EntryRef(Filename, Entry);
82 }
83 
84 DependencyScanningFilesystemSharedCache::
85     DependencyScanningFilesystemSharedCache() {
86   // This heuristic was chosen using a empirical testing on a
87   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
88   // sharding gives a performance edge by reducing the lock contention.
89   // FIXME: A better heuristic might also consider the OS to account for
90   // the different cost of lock contention on different OSes.
91   NumShards =
92       std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
93   CacheShards = std::make_unique<CacheShard[]>(NumShards);
94 }
95 
96 DependencyScanningFilesystemSharedCache::CacheShard &
97 DependencyScanningFilesystemSharedCache::getShardForFilename(
98     StringRef Filename) const {
99   assert(llvm::sys::path::is_absolute_gnu(Filename));
100   return CacheShards[llvm::hash_value(Filename) % NumShards];
101 }
102 
103 DependencyScanningFilesystemSharedCache::CacheShard &
104 DependencyScanningFilesystemSharedCache::getShardForUID(
105     llvm::sys::fs::UniqueID UID) const {
106   auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
107   return CacheShards[Hash % NumShards];
108 }
109 
110 const CachedFileSystemEntry *
111 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
112     StringRef Filename) const {
113   assert(llvm::sys::path::is_absolute_gnu(Filename));
114   std::lock_guard<std::mutex> LockGuard(CacheLock);
115   auto It = EntriesByFilename.find(Filename);
116   return It == EntriesByFilename.end() ? nullptr : It->getValue();
117 }
118 
119 const CachedFileSystemEntry *
120 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
121     llvm::sys::fs::UniqueID UID) const {
122   std::lock_guard<std::mutex> LockGuard(CacheLock);
123   auto It = EntriesByUID.find(UID);
124   return It == EntriesByUID.end() ? nullptr : It->getSecond();
125 }
126 
127 const CachedFileSystemEntry &
128 DependencyScanningFilesystemSharedCache::CacheShard::
129     getOrEmplaceEntryForFilename(StringRef Filename,
130                                  llvm::ErrorOr<llvm::vfs::Status> Stat) {
131   std::lock_guard<std::mutex> LockGuard(CacheLock);
132   auto Insertion = EntriesByFilename.insert({Filename, nullptr});
133   if (Insertion.second)
134     Insertion.first->second =
135         new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
136   return *Insertion.first->second;
137 }
138 
139 const CachedFileSystemEntry &
140 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
141     llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
142     std::unique_ptr<llvm::MemoryBuffer> Contents) {
143   std::lock_guard<std::mutex> LockGuard(CacheLock);
144   auto Insertion = EntriesByUID.insert({UID, nullptr});
145   if (Insertion.second) {
146     CachedFileContents *StoredContents = nullptr;
147     if (Contents)
148       StoredContents = new (ContentsStorage.Allocate())
149           CachedFileContents(std::move(Contents));
150     Insertion.first->second = new (EntryStorage.Allocate())
151         CachedFileSystemEntry(std::move(Stat), StoredContents);
152   }
153   return *Insertion.first->second;
154 }
155 
156 const CachedFileSystemEntry &
157 DependencyScanningFilesystemSharedCache::CacheShard::
158     getOrInsertEntryForFilename(StringRef Filename,
159                                 const CachedFileSystemEntry &Entry) {
160   std::lock_guard<std::mutex> LockGuard(CacheLock);
161   return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
162 }
163 
164 /// Whitelist file extensions that should be minimized, treating no extension as
165 /// a source file that should be minimized.
166 ///
167 /// This is kinda hacky, it would be better if we knew what kind of file Clang
168 /// was expecting instead.
169 static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
170   StringRef Ext = llvm::sys::path::extension(Filename);
171   if (Ext.empty())
172     return true; // C++ standard library
173   return llvm::StringSwitch<bool>(Ext)
174       .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
175       .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
176       .CasesLower(".m", ".mm", true)
177       .CasesLower(".i", ".ii", ".mi", ".mmi", true)
178       .CasesLower(".def", ".inc", true)
179       .Default(false);
180 }
181 
182 static bool shouldCacheStatFailures(StringRef Filename) {
183   StringRef Ext = llvm::sys::path::extension(Filename);
184   if (Ext.empty())
185     return false; // This may be the module cache directory.
186   // Only cache stat failures on files that are not expected to change during
187   // the build.
188   StringRef FName = llvm::sys::path::filename(Filename);
189   if (FName == "module.modulemap" || FName == "module.map")
190     return true;
191   return shouldScanForDirectivesBasedOnExtension(Filename);
192 }
193 
194 DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
195     DependencyScanningFilesystemSharedCache &SharedCache,
196     IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
197     : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache),
198       WorkingDirForCacheLookup(llvm::errc::invalid_argument) {
199   updateWorkingDirForCacheLookup();
200 }
201 
202 bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
203     StringRef Filename) {
204   return shouldScanForDirectivesBasedOnExtension(Filename);
205 }
206 
207 const CachedFileSystemEntry &
208 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
209     TentativeEntry TEntry) {
210   auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
211   return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
212                                        std::move(TEntry.Status),
213                                        std::move(TEntry.Contents));
214 }
215 
216 const CachedFileSystemEntry *
217 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
218     StringRef Filename) {
219   if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
220     return Entry;
221   auto &Shard = SharedCache.getShardForFilename(Filename);
222   if (const auto *Entry = Shard.findEntryByFilename(Filename))
223     return &LocalCache.insertEntryForFilename(Filename, *Entry);
224   return nullptr;
225 }
226 
227 llvm::ErrorOr<const CachedFileSystemEntry &>
228 DependencyScanningWorkerFilesystem::computeAndStoreResult(
229     StringRef OriginalFilename, StringRef FilenameForLookup) {
230   llvm::ErrorOr<llvm::vfs::Status> Stat =
231       getUnderlyingFS().status(OriginalFilename);
232   if (!Stat) {
233     if (!shouldCacheStatFailures(OriginalFilename))
234       return Stat.getError();
235     const auto &Entry =
236         getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError());
237     return insertLocalEntryForFilename(FilenameForLookup, Entry);
238   }
239 
240   if (const auto *Entry = findSharedEntryByUID(*Stat))
241     return insertLocalEntryForFilename(FilenameForLookup, *Entry);
242 
243   auto TEntry =
244       Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename);
245 
246   const CachedFileSystemEntry *SharedEntry = [&]() {
247     if (TEntry) {
248       const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
249       return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry);
250     }
251     return &getOrEmplaceSharedEntryForFilename(FilenameForLookup,
252                                                TEntry.getError());
253   }();
254 
255   return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry);
256 }
257 
258 llvm::ErrorOr<EntryRef>
259 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
260     StringRef OriginalFilename, bool DisableDirectivesScanning) {
261   StringRef FilenameForLookup;
262   SmallString<256> PathBuf;
263   if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
264     FilenameForLookup = OriginalFilename;
265   } else if (!WorkingDirForCacheLookup) {
266     return WorkingDirForCacheLookup.getError();
267   } else {
268     StringRef RelFilename = OriginalFilename;
269     RelFilename.consume_front("./");
270     PathBuf = *WorkingDirForCacheLookup;
271     llvm::sys::path::append(PathBuf, RelFilename);
272     FilenameForLookup = PathBuf.str();
273   }
274   assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
275   if (const auto *Entry =
276           findEntryByFilenameWithWriteThrough(FilenameForLookup))
277     return scanForDirectivesIfNecessary(*Entry, OriginalFilename,
278                                         DisableDirectivesScanning)
279         .unwrapError();
280   auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup);
281   if (!MaybeEntry)
282     return MaybeEntry.getError();
283   return scanForDirectivesIfNecessary(*MaybeEntry, OriginalFilename,
284                                       DisableDirectivesScanning)
285       .unwrapError();
286 }
287 
288 llvm::ErrorOr<llvm::vfs::Status>
289 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
290   SmallString<256> OwnedFilename;
291   StringRef Filename = Path.toStringRef(OwnedFilename);
292 
293   if (Filename.endswith(".pcm"))
294     return getUnderlyingFS().status(Path);
295 
296   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
297   if (!Result)
298     return Result.getError();
299   return Result->getStatus();
300 }
301 
302 namespace {
303 
304 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
305 /// this subclass.
306 class DepScanFile final : public llvm::vfs::File {
307 public:
308   DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
309               llvm::vfs::Status Stat)
310       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
311 
312   static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
313 
314   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
315 
316   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
317   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
318             bool IsVolatile) override {
319     return std::move(Buffer);
320   }
321 
322   std::error_code close() override { return {}; }
323 
324 private:
325   std::unique_ptr<llvm::MemoryBuffer> Buffer;
326   llvm::vfs::Status Stat;
327 };
328 
329 } // end anonymous namespace
330 
331 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
332 DepScanFile::create(EntryRef Entry) {
333   assert(!Entry.isError() && "error");
334 
335   if (Entry.isDirectory())
336     return std::make_error_code(std::errc::is_a_directory);
337 
338   auto Result = std::make_unique<DepScanFile>(
339       llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
340                                        Entry.getStatus().getName(),
341                                        /*RequiresNullTerminator=*/false),
342       Entry.getStatus());
343 
344   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
345       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
346 }
347 
348 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
349 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
350   SmallString<256> OwnedFilename;
351   StringRef Filename = Path.toStringRef(OwnedFilename);
352 
353   if (Filename.endswith(".pcm"))
354     return getUnderlyingFS().openFileForRead(Path);
355 
356   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
357   if (!Result)
358     return Result.getError();
359   return DepScanFile::create(Result.get());
360 }
361 
362 std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
363     const Twine &Path) {
364   std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);
365   updateWorkingDirForCacheLookup();
366   return EC;
367 }
368 
369 void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
370   llvm::ErrorOr<std::string> CWD =
371       getUnderlyingFS().getCurrentWorkingDirectory();
372   if (!CWD) {
373     WorkingDirForCacheLookup = CWD.getError();
374   } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) {
375     WorkingDirForCacheLookup = llvm::errc::invalid_argument;
376   } else {
377     WorkingDirForCacheLookup = *CWD;
378   }
379   assert(!WorkingDirForCacheLookup ||
380          llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
381 }
382