1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10 #include "llvm/Support/MemoryBuffer.h"
11 #include "llvm/Support/SmallVectorMemoryBuffer.h"
12 #include "llvm/Support/Threading.h"
13 
14 using namespace clang;
15 using namespace tooling;
16 using namespace dependencies;
17 
18 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
19 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
20   // Load the file and its content from the file system.
21   auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
22   if (!MaybeFile)
23     return MaybeFile.getError();
24   auto File = std::move(*MaybeFile);
25 
26   auto MaybeStat = File->status();
27   if (!MaybeStat)
28     return MaybeStat.getError();
29   auto Stat = std::move(*MaybeStat);
30 
31   auto MaybeBuffer = File->getBuffer(Stat.getName());
32   if (!MaybeBuffer)
33     return MaybeBuffer.getError();
34   auto Buffer = std::move(*MaybeBuffer);
35 
36   // If the file size changed between read and stat, pretend it didn't.
37   if (Stat.getSize() != Buffer->getBufferSize())
38     Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
39 
40   return TentativeEntry(Stat, std::move(Buffer));
41 }
42 
43 EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
44     const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
45   if (Entry.isError() || Entry.isDirectory() || Disable ||
46       !shouldScanForDirectives(Filename))
47     return EntryRef(Filename, Entry);
48 
49   CachedFileContents *Contents = Entry.getCachedContents();
50   assert(Contents && "contents not initialized");
51 
52   // Double-checked locking.
53   if (Contents->DepDirectives.load())
54     return EntryRef(Filename, Entry);
55 
56   std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
57 
58   // Double-checked locking.
59   if (Contents->DepDirectives.load())
60     return EntryRef(Filename, Entry);
61 
62   SmallVector<dependency_directives_scan::Directive, 64> Directives;
63   // Scan the file for preprocessor directives that might affect the
64   // dependencies.
65   if (scanSourceForDependencyDirectives(Contents->Original->getBuffer(),
66                                         Contents->DepDirectiveTokens,
67                                         Directives)) {
68     Contents->DepDirectiveTokens.clear();
69     // FIXME: Propagate the diagnostic if desired by the client.
70     Contents->DepDirectives.store(new Optional<DependencyDirectivesTy>());
71     return EntryRef(Filename, Entry);
72   }
73 
74   // This function performed double-checked locking using `DepDirectives`.
75   // Assigning it must be the last thing this function does, otherwise other
76   // threads may skip the
77   // critical section (`DepDirectives != nullptr`), leading to a data race.
78   Contents->DepDirectives.store(
79       new Optional<DependencyDirectivesTy>(std::move(Directives)));
80   return EntryRef(Filename, Entry);
81 }
82 
83 DependencyScanningFilesystemSharedCache::
84     DependencyScanningFilesystemSharedCache() {
85   // This heuristic was chosen using a empirical testing on a
86   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
87   // sharding gives a performance edge by reducing the lock contention.
88   // FIXME: A better heuristic might also consider the OS to account for
89   // the different cost of lock contention on different OSes.
90   NumShards =
91       std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
92   CacheShards = std::make_unique<CacheShard[]>(NumShards);
93 }
94 
95 DependencyScanningFilesystemSharedCache::CacheShard &
96 DependencyScanningFilesystemSharedCache::getShardForFilename(
97     StringRef Filename) const {
98   return CacheShards[llvm::hash_value(Filename) % NumShards];
99 }
100 
101 DependencyScanningFilesystemSharedCache::CacheShard &
102 DependencyScanningFilesystemSharedCache::getShardForUID(
103     llvm::sys::fs::UniqueID UID) const {
104   auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
105   return CacheShards[Hash % NumShards];
106 }
107 
108 const CachedFileSystemEntry *
109 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
110     StringRef Filename) const {
111   std::lock_guard<std::mutex> LockGuard(CacheLock);
112   auto It = EntriesByFilename.find(Filename);
113   return It == EntriesByFilename.end() ? nullptr : It->getValue();
114 }
115 
116 const CachedFileSystemEntry *
117 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
118     llvm::sys::fs::UniqueID UID) const {
119   std::lock_guard<std::mutex> LockGuard(CacheLock);
120   auto It = EntriesByUID.find(UID);
121   return It == EntriesByUID.end() ? nullptr : It->getSecond();
122 }
123 
124 const CachedFileSystemEntry &
125 DependencyScanningFilesystemSharedCache::CacheShard::
126     getOrEmplaceEntryForFilename(StringRef Filename,
127                                  llvm::ErrorOr<llvm::vfs::Status> Stat) {
128   std::lock_guard<std::mutex> LockGuard(CacheLock);
129   auto Insertion = EntriesByFilename.insert({Filename, nullptr});
130   if (Insertion.second)
131     Insertion.first->second =
132         new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
133   return *Insertion.first->second;
134 }
135 
136 const CachedFileSystemEntry &
137 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
138     llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
139     std::unique_ptr<llvm::MemoryBuffer> Contents) {
140   std::lock_guard<std::mutex> LockGuard(CacheLock);
141   auto Insertion = EntriesByUID.insert({UID, nullptr});
142   if (Insertion.second) {
143     CachedFileContents *StoredContents = nullptr;
144     if (Contents)
145       StoredContents = new (ContentsStorage.Allocate())
146           CachedFileContents(std::move(Contents));
147     Insertion.first->second = new (EntryStorage.Allocate())
148         CachedFileSystemEntry(std::move(Stat), StoredContents);
149   }
150   return *Insertion.first->second;
151 }
152 
153 const CachedFileSystemEntry &
154 DependencyScanningFilesystemSharedCache::CacheShard::
155     getOrInsertEntryForFilename(StringRef Filename,
156                                 const CachedFileSystemEntry &Entry) {
157   std::lock_guard<std::mutex> LockGuard(CacheLock);
158   return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
159 }
160 
161 /// Whitelist file extensions that should be minimized, treating no extension as
162 /// a source file that should be minimized.
163 ///
164 /// This is kinda hacky, it would be better if we knew what kind of file Clang
165 /// was expecting instead.
166 static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
167   StringRef Ext = llvm::sys::path::extension(Filename);
168   if (Ext.empty())
169     return true; // C++ standard library
170   return llvm::StringSwitch<bool>(Ext)
171       .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
172       .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
173       .CasesLower(".m", ".mm", true)
174       .CasesLower(".i", ".ii", ".mi", ".mmi", true)
175       .CasesLower(".def", ".inc", true)
176       .Default(false);
177 }
178 
179 static bool shouldCacheStatFailures(StringRef Filename) {
180   StringRef Ext = llvm::sys::path::extension(Filename);
181   if (Ext.empty())
182     return false; // This may be the module cache directory.
183   // Only cache stat failures on source files.
184   return shouldScanForDirectivesBasedOnExtension(Filename);
185 }
186 
187 bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
188     StringRef Filename) {
189   return shouldScanForDirectivesBasedOnExtension(Filename);
190 }
191 
192 const CachedFileSystemEntry &
193 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
194     TentativeEntry TEntry) {
195   auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
196   return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
197                                        std::move(TEntry.Status),
198                                        std::move(TEntry.Contents));
199 }
200 
201 const CachedFileSystemEntry *
202 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
203     StringRef Filename) {
204   if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
205     return Entry;
206   auto &Shard = SharedCache.getShardForFilename(Filename);
207   if (const auto *Entry = Shard.findEntryByFilename(Filename))
208     return &LocalCache.insertEntryForFilename(Filename, *Entry);
209   return nullptr;
210 }
211 
212 llvm::ErrorOr<const CachedFileSystemEntry &>
213 DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
214   llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
215   if (!Stat) {
216     if (!shouldCacheStatFailures(Filename))
217       return Stat.getError();
218     const auto &Entry =
219         getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
220     return insertLocalEntryForFilename(Filename, Entry);
221   }
222 
223   if (const auto *Entry = findSharedEntryByUID(*Stat))
224     return insertLocalEntryForFilename(Filename, *Entry);
225 
226   auto TEntry =
227       Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
228 
229   const CachedFileSystemEntry *SharedEntry = [&]() {
230     if (TEntry) {
231       const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
232       return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
233     }
234     return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
235   }();
236 
237   return insertLocalEntryForFilename(Filename, *SharedEntry);
238 }
239 
240 llvm::ErrorOr<EntryRef>
241 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
242     StringRef Filename, bool DisableDirectivesScanning) {
243   if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
244     return scanForDirectivesIfNecessary(*Entry, Filename,
245                                         DisableDirectivesScanning)
246         .unwrapError();
247   auto MaybeEntry = computeAndStoreResult(Filename);
248   if (!MaybeEntry)
249     return MaybeEntry.getError();
250   return scanForDirectivesIfNecessary(*MaybeEntry, Filename,
251                                       DisableDirectivesScanning)
252       .unwrapError();
253 }
254 
255 llvm::ErrorOr<llvm::vfs::Status>
256 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
257   SmallString<256> OwnedFilename;
258   StringRef Filename = Path.toStringRef(OwnedFilename);
259 
260   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
261   if (!Result)
262     return Result.getError();
263   return Result->getStatus();
264 }
265 
266 namespace {
267 
268 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
269 /// this subclass.
270 class DepScanFile final : public llvm::vfs::File {
271 public:
272   DepScanFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
273               llvm::vfs::Status Stat)
274       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
275 
276   static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> create(EntryRef Entry);
277 
278   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
279 
280   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
281   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
282             bool IsVolatile) override {
283     return std::move(Buffer);
284   }
285 
286   std::error_code close() override { return {}; }
287 
288 private:
289   std::unique_ptr<llvm::MemoryBuffer> Buffer;
290   llvm::vfs::Status Stat;
291 };
292 
293 } // end anonymous namespace
294 
295 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
296 DepScanFile::create(EntryRef Entry) {
297   assert(!Entry.isError() && "error");
298 
299   if (Entry.isDirectory())
300     return std::make_error_code(std::errc::is_a_directory);
301 
302   auto Result = std::make_unique<DepScanFile>(
303       llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
304                                        Entry.getStatus().getName(),
305                                        /*RequiresNullTerminator=*/false),
306       Entry.getStatus());
307 
308   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
309       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
310 }
311 
312 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
313 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
314   SmallString<256> OwnedFilename;
315   StringRef Filename = Path.toStringRef(OwnedFilename);
316 
317   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
318   if (!Result)
319     return Result.getError();
320   return DepScanFile::create(Result.get());
321 }
322