1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
10 #include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
11 #include "llvm/Support/MemoryBuffer.h"
12 #include "llvm/Support/SmallVectorMemoryBuffer.h"
13 #include "llvm/Support/Threading.h"
14 
15 using namespace clang;
16 using namespace tooling;
17 using namespace dependencies;
18 
19 llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
20 DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
21   // Load the file and its content from the file system.
22   auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
23   if (!MaybeFile)
24     return MaybeFile.getError();
25   auto File = std::move(*MaybeFile);
26 
27   auto MaybeStat = File->status();
28   if (!MaybeStat)
29     return MaybeStat.getError();
30   auto Stat = std::move(*MaybeStat);
31 
32   auto MaybeBuffer = File->getBuffer(Stat.getName());
33   if (!MaybeBuffer)
34     return MaybeBuffer.getError();
35   auto Buffer = std::move(*MaybeBuffer);
36 
37   // If the file size changed between read and stat, pretend it didn't.
38   if (Stat.getSize() != Buffer->getBufferSize())
39     Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
40 
41   return TentativeEntry(Stat, std::move(Buffer));
42 }
43 
44 EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary(
45     const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
46   if (Entry.isError() || Entry.isDirectory() || Disable ||
47       !shouldMinimize(Filename, Entry.getUniqueID()))
48     return EntryRef(/*Minimized=*/false, Filename, Entry);
49 
50   CachedFileContents *Contents = Entry.getContents();
51   assert(Contents && "contents not initialized");
52 
53   // Double-checked locking.
54   if (Contents->MinimizedAccess.load())
55     return EntryRef(/*Minimized=*/true, Filename, Entry);
56 
57   std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
58 
59   // Double-checked locking.
60   if (Contents->MinimizedAccess.load())
61     return EntryRef(/*Minimized=*/true, Filename, Entry);
62 
63   llvm::SmallString<1024> MinimizedFileContents;
64   // Minimize the file down to directives that might affect the dependencies.
65   SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
66   if (minimizeSourceToDependencyDirectives(Contents->Original->getBuffer(),
67                                            MinimizedFileContents, Tokens)) {
68     // FIXME: Propagate the diagnostic if desired by the client.
69     // Use the original file if the minimization failed.
70     Contents->MinimizedStorage =
71         llvm::MemoryBuffer::getMemBuffer(*Contents->Original);
72     Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
73     return EntryRef(/*Minimized=*/true, Filename, Entry);
74   }
75 
76   // The contents produced by the minimizer must be null terminated.
77   assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
78          "not null terminated contents");
79 
80   // Compute the skipped PP ranges that speedup skipping over inactive
81   // preprocessor blocks.
82   llvm::SmallVector<minimize_source_to_dependency_directives::SkippedRange, 32>
83       SkippedRanges;
84   minimize_source_to_dependency_directives::computeSkippedRanges(Tokens,
85                                                                  SkippedRanges);
86   PreprocessorSkippedRangeMapping Mapping;
87   for (const auto &Range : SkippedRanges) {
88     if (Range.Length < 16) {
89       // Ignore small ranges as non-profitable.
90       // FIXME: This is a heuristic, its worth investigating the tradeoffs
91       // when it should be applied.
92       continue;
93     }
94     Mapping[Range.Offset] = Range.Length;
95   }
96   Contents->PPSkippedRangeMapping = std::move(Mapping);
97 
98   Contents->MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
99       std::move(MinimizedFileContents));
100   // This function performed double-checked locking using `MinimizedAccess`.
101   // Assigning it must be the last thing this function does. If we were to
102   // assign it before `PPSkippedRangeMapping`, other threads may skip the
103   // critical section (`MinimizedAccess != nullptr`) and access the mappings
104   // that are about to be initialized, leading to a data race.
105   Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
106   return EntryRef(/*Minimized=*/true, Filename, Entry);
107 }
108 
109 DependencyScanningFilesystemSharedCache::
110     DependencyScanningFilesystemSharedCache() {
111   // This heuristic was chosen using a empirical testing on a
112   // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
113   // sharding gives a performance edge by reducing the lock contention.
114   // FIXME: A better heuristic might also consider the OS to account for
115   // the different cost of lock contention on different OSes.
116   NumShards =
117       std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
118   CacheShards = std::make_unique<CacheShard[]>(NumShards);
119 }
120 
121 DependencyScanningFilesystemSharedCache::CacheShard &
122 DependencyScanningFilesystemSharedCache::getShardForFilename(
123     StringRef Filename) const {
124   return CacheShards[llvm::hash_value(Filename) % NumShards];
125 }
126 
127 DependencyScanningFilesystemSharedCache::CacheShard &
128 DependencyScanningFilesystemSharedCache::getShardForUID(
129     llvm::sys::fs::UniqueID UID) const {
130   auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
131   return CacheShards[Hash % NumShards];
132 }
133 
134 const CachedFileSystemEntry *
135 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
136     StringRef Filename) const {
137   std::lock_guard<std::mutex> LockGuard(CacheLock);
138   auto It = EntriesByFilename.find(Filename);
139   return It == EntriesByFilename.end() ? nullptr : It->getValue();
140 }
141 
142 const CachedFileSystemEntry *
143 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
144     llvm::sys::fs::UniqueID UID) const {
145   std::lock_guard<std::mutex> LockGuard(CacheLock);
146   auto It = EntriesByUID.find(UID);
147   return It == EntriesByUID.end() ? nullptr : It->getSecond();
148 }
149 
150 const CachedFileSystemEntry &
151 DependencyScanningFilesystemSharedCache::CacheShard::
152     getOrEmplaceEntryForFilename(StringRef Filename,
153                                  llvm::ErrorOr<llvm::vfs::Status> Stat) {
154   std::lock_guard<std::mutex> LockGuard(CacheLock);
155   auto Insertion = EntriesByFilename.insert({Filename, nullptr});
156   if (Insertion.second)
157     Insertion.first->second =
158         new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
159   return *Insertion.first->second;
160 }
161 
162 const CachedFileSystemEntry &
163 DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
164     llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
165     std::unique_ptr<llvm::MemoryBuffer> Contents) {
166   std::lock_guard<std::mutex> LockGuard(CacheLock);
167   auto Insertion = EntriesByUID.insert({UID, nullptr});
168   if (Insertion.second) {
169     CachedFileContents *StoredContents = nullptr;
170     if (Contents)
171       StoredContents = new (ContentsStorage.Allocate())
172           CachedFileContents(std::move(Contents));
173     Insertion.first->second = new (EntryStorage.Allocate())
174         CachedFileSystemEntry(std::move(Stat), StoredContents);
175   }
176   return *Insertion.first->second;
177 }
178 
179 const CachedFileSystemEntry &
180 DependencyScanningFilesystemSharedCache::CacheShard::
181     getOrInsertEntryForFilename(StringRef Filename,
182                                 const CachedFileSystemEntry &Entry) {
183   std::lock_guard<std::mutex> LockGuard(CacheLock);
184   return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
185 }
186 
187 /// Whitelist file extensions that should be minimized, treating no extension as
188 /// a source file that should be minimized.
189 ///
190 /// This is kinda hacky, it would be better if we knew what kind of file Clang
191 /// was expecting instead.
192 static bool shouldMinimizeBasedOnExtension(StringRef Filename) {
193   StringRef Ext = llvm::sys::path::extension(Filename);
194   if (Ext.empty())
195     return true; // C++ standard library
196   return llvm::StringSwitch<bool>(Ext)
197       .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
198       .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
199       .CasesLower(".m", ".mm", true)
200       .CasesLower(".i", ".ii", ".mi", ".mmi", true)
201       .CasesLower(".def", ".inc", true)
202       .Default(false);
203 }
204 
205 static bool shouldCacheStatFailures(StringRef Filename) {
206   StringRef Ext = llvm::sys::path::extension(Filename);
207   if (Ext.empty())
208     return false; // This may be the module cache directory.
209   // Only cache stat failures on source files.
210   return shouldMinimizeBasedOnExtension(Filename);
211 }
212 
213 void DependencyScanningWorkerFilesystem::disableMinimization(
214     StringRef Filename) {
215   // Since we're not done setting up `NotToBeMinimized` yet, we need to disable
216   // minimization explicitly.
217   if (llvm::ErrorOr<EntryRef> Result =
218           getOrCreateFileSystemEntry(Filename, /*DisableMinimization=*/true))
219     NotToBeMinimized.insert(Result->getStatus().getUniqueID());
220 }
221 
222 bool DependencyScanningWorkerFilesystem::shouldMinimize(
223     StringRef Filename, llvm::sys::fs::UniqueID UID) {
224   return shouldMinimizeBasedOnExtension(Filename) &&
225          !NotToBeMinimized.contains(UID);
226 }
227 
228 const CachedFileSystemEntry &
229 DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
230     TentativeEntry TEntry) {
231   auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
232   return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
233                                        std::move(TEntry.Status),
234                                        std::move(TEntry.Contents));
235 }
236 
237 const CachedFileSystemEntry *
238 DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
239     StringRef Filename) {
240   if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
241     return Entry;
242   auto &Shard = SharedCache.getShardForFilename(Filename);
243   if (const auto *Entry = Shard.findEntryByFilename(Filename))
244     return &LocalCache.insertEntryForFilename(Filename, *Entry);
245   return nullptr;
246 }
247 
248 llvm::ErrorOr<const CachedFileSystemEntry &>
249 DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
250   llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
251   if (!Stat) {
252     if (!shouldCacheStatFailures(Filename))
253       return Stat.getError();
254     const auto &Entry =
255         getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
256     return insertLocalEntryForFilename(Filename, Entry);
257   }
258 
259   if (const auto *Entry = findSharedEntryByUID(*Stat))
260     return insertLocalEntryForFilename(Filename, *Entry);
261 
262   auto TEntry =
263       Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
264 
265   const CachedFileSystemEntry *SharedEntry = [&]() {
266     if (TEntry) {
267       const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
268       return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
269     }
270     return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
271   }();
272 
273   return insertLocalEntryForFilename(Filename, *SharedEntry);
274 }
275 
276 llvm::ErrorOr<EntryRef>
277 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
278     StringRef Filename, bool DisableMinimization) {
279   if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
280     return minimizeIfNecessary(*Entry, Filename, DisableMinimization)
281         .unwrapError();
282   auto MaybeEntry = computeAndStoreResult(Filename);
283   if (!MaybeEntry)
284     return MaybeEntry.getError();
285   return minimizeIfNecessary(*MaybeEntry, Filename, DisableMinimization)
286       .unwrapError();
287 }
288 
289 llvm::ErrorOr<llvm::vfs::Status>
290 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
291   SmallString<256> OwnedFilename;
292   StringRef Filename = Path.toStringRef(OwnedFilename);
293 
294   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
295   if (!Result)
296     return Result.getError();
297   return Result->getStatus();
298 }
299 
300 namespace {
301 
302 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
303 /// this subclass.
304 class MinimizedVFSFile final : public llvm::vfs::File {
305 public:
306   MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
307                    llvm::vfs::Status Stat)
308       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
309 
310   static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
311   create(EntryRef Entry,
312          ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings);
313 
314   llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
315 
316   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
317   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
318             bool IsVolatile) override {
319     return std::move(Buffer);
320   }
321 
322   std::error_code close() override { return {}; }
323 
324 private:
325   std::unique_ptr<llvm::MemoryBuffer> Buffer;
326   llvm::vfs::Status Stat;
327 };
328 
329 } // end anonymous namespace
330 
331 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
332     EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) {
333   assert(!Entry.isError() && "error");
334 
335   if (Entry.isDirectory())
336     return std::make_error_code(std::errc::is_a_directory);
337 
338   auto Result = std::make_unique<MinimizedVFSFile>(
339       llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
340                                        Entry.getStatus().getName(),
341                                        /*RequiresNullTerminator=*/false),
342       Entry.getStatus());
343 
344   const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping();
345   if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings)
346     (*PPSkipMappings)[Result->Buffer->getBufferStart()] = EntrySkipMappings;
347 
348   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
349       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
350 }
351 
352 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
353 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
354   SmallString<256> OwnedFilename;
355   StringRef Filename = Path.toStringRef(OwnedFilename);
356 
357   llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
358   if (!Result)
359     return Result.getError();
360   return MinimizedVFSFile::create(Result.get(), PPSkipMappings);
361 }
362