1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 ///
11 /// This file contains several definitions for the debuginfod client and server.
12 /// For the client, this file defines the fetchInfo function. For the server,
13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
15 /// function retrieves any of the three supported artifact types: (executable,
16 /// debuginfo, source file) associated with a build-id from debuginfod servers.
17 /// If a source file is to be fetched, its absolute path must be specified in
18 /// the Description argument to fetchInfo. The DebuginfodLogEntry,
19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
20 /// scan the local filesystem for binaries and serve the debuginfod protocol.
21 ///
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/Debuginfod/Debuginfod.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/BinaryFormat/Magic.h"
28 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
29 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
30 #include "llvm/Debuginfod/HTTPClient.h"
31 #include "llvm/Object/BuildID.h"
32 #include "llvm/Object/ELFObjectFile.h"
33 #include "llvm/Support/CachePruning.h"
34 #include "llvm/Support/Caching.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/FileUtilities.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/ThreadPool.h"
41 #include "llvm/Support/xxhash.h"
42 
43 #include <atomic>
44 #include <thread>
45 
46 namespace llvm {
47 
48 using llvm::object::BuildIDRef;
49 
50 static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); }
51 
52 // Returns a binary BuildID as a normalized hex string.
53 // Uses lowercase for compatibility with common debuginfod servers.
54 static std::string buildIDToString(BuildIDRef ID) {
55   return llvm::toHex(ID, /*LowerCase=*/true);
56 }
57 
58 bool canUseDebuginfod() {
59   return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty();
60 }
61 
62 SmallVector<StringRef> getDefaultDebuginfodUrls() {
63   const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS");
64   if (DebuginfodUrlsEnv == nullptr)
65     return SmallVector<StringRef>();
66 
67   SmallVector<StringRef> DebuginfodUrls;
68   StringRef(DebuginfodUrlsEnv).split(DebuginfodUrls, " ");
69   return DebuginfodUrls;
70 }
71 
72 /// Finds a default local file caching directory for the debuginfod client,
73 /// first checking DEBUGINFOD_CACHE_PATH.
74 Expected<std::string> getDefaultDebuginfodCacheDirectory() {
75   if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
76     return CacheDirectoryEnv;
77 
78   SmallString<64> CacheDirectory;
79   if (!sys::path::cache_directory(CacheDirectory))
80     return createStringError(
81         errc::io_error, "Unable to determine appropriate cache directory.");
82   sys::path::append(CacheDirectory, "llvm-debuginfod", "client");
83   return std::string(CacheDirectory);
84 }
85 
86 std::chrono::milliseconds getDefaultDebuginfodTimeout() {
87   long Timeout;
88   const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");
89   if (DebuginfodTimeoutEnv &&
90       to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))
91     return std::chrono::milliseconds(Timeout * 1000);
92 
93   return std::chrono::milliseconds(90 * 1000);
94 }
95 
96 /// The following functions fetch a debuginfod artifact to a file in a local
97 /// cache and return the cached file path. They first search the local cache,
98 /// followed by the debuginfod servers.
99 
100 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
101                                                 StringRef SourceFilePath) {
102   SmallString<64> UrlPath;
103   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
104                     buildIDToString(ID), "source",
105                     sys::path::convert_to_slash(SourceFilePath));
106   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
107 }
108 
109 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
110   SmallString<64> UrlPath;
111   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
112                     buildIDToString(ID), "executable");
113   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
114 }
115 
116 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
117   SmallString<64> UrlPath;
118   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
119                     buildIDToString(ID), "debuginfo");
120   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
121 }
122 
123 // General fetching function.
124 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
125                                                   StringRef UrlPath) {
126   SmallString<10> CacheDir;
127 
128   Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
129   if (!CacheDirOrErr)
130     return CacheDirOrErr.takeError();
131   CacheDir = *CacheDirOrErr;
132 
133   return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
134                                      getDefaultDebuginfodUrls(),
135                                      getDefaultDebuginfodTimeout());
136 }
137 
138 namespace {
139 
140 /// A simple handler which streams the returned data to a cache file. The cache
141 /// file is only created if a 200 OK status is observed.
142 class StreamedHTTPResponseHandler : public HTTPResponseHandler {
143   using CreateStreamFn =
144       std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
145   CreateStreamFn CreateStream;
146   HTTPClient &Client;
147   std::unique_ptr<CachedFileStream> FileStream;
148 
149 public:
150   StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
151       : CreateStream(CreateStream), Client(Client) {}
152   virtual ~StreamedHTTPResponseHandler() = default;
153 
154   Error handleBodyChunk(StringRef BodyChunk) override;
155 };
156 
157 } // namespace
158 
159 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
160   if (!FileStream) {
161     unsigned Code = Client.responseCode();
162     if (Code && Code != 200)
163       return Error::success();
164     Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
165         CreateStream();
166     if (!FileStreamOrError)
167       return FileStreamOrError.takeError();
168     FileStream = std::move(*FileStreamOrError);
169   }
170   *FileStream->OS << BodyChunk;
171   return Error::success();
172 }
173 
174 // An over-accepting simplification of the HTTP RFC 7230 spec.
175 static bool isHeader(StringRef S) {
176   StringRef Name;
177   StringRef Value;
178   std::tie(Name, Value) = S.split(':');
179   if (Name.empty() || Value.empty())
180     return false;
181   return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) &&
182          all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; });
183 }
184 
185 static SmallVector<std::string, 0> getHeaders() {
186   const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE");
187   if (!Filename)
188     return {};
189   ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile =
190       MemoryBuffer::getFile(Filename, /*IsText=*/true);
191   if (!HeadersFile)
192     return {};
193 
194   SmallVector<std::string, 0> Headers;
195   uint64_t LineNumber = 0;
196   for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) {
197     LineNumber++;
198     if (!Line.empty() && Line.back() == '\r')
199       Line = Line.drop_back();
200     if (!isHeader(Line)) {
201       if (!all_of(Line, llvm::isSpace))
202         WithColor::warning()
203             << "could not parse debuginfod header: " << Filename << ':'
204             << LineNumber << '\n';
205       continue;
206     }
207     Headers.emplace_back(Line);
208   }
209   return Headers;
210 }
211 
212 Expected<std::string> getCachedOrDownloadArtifact(
213     StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
214     ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
215   SmallString<64> AbsCachedArtifactPath;
216   sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,
217                     "llvmcache-" + UniqueKey);
218 
219   Expected<FileCache> CacheOrErr =
220       localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);
221   if (!CacheOrErr)
222     return CacheOrErr.takeError();
223 
224   FileCache Cache = *CacheOrErr;
225   // We choose an arbitrary Task parameter as we do not make use of it.
226   unsigned Task = 0;
227   Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, "");
228   if (!CacheAddStreamOrErr)
229     return CacheAddStreamOrErr.takeError();
230   AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
231   if (!CacheAddStream)
232     return std::string(AbsCachedArtifactPath);
233   // The artifact was not found in the local cache, query the debuginfod
234   // servers.
235   if (!HTTPClient::isAvailable())
236     return createStringError(errc::io_error,
237                              "No working HTTP client is available.");
238 
239   if (!HTTPClient::IsInitialized)
240     return createStringError(
241         errc::io_error,
242         "A working HTTP client is available, but it is not initialized. To "
243         "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
244         "at the beginning of main.");
245 
246   HTTPClient Client;
247   Client.setTimeout(Timeout);
248   for (StringRef ServerUrl : DebuginfodUrls) {
249     SmallString<64> ArtifactUrl;
250     sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
251 
252     // Perform the HTTP request and if successful, write the response body to
253     // the cache.
254     StreamedHTTPResponseHandler Handler(
255         [&]() { return CacheAddStream(Task, ""); }, Client);
256     HTTPRequest Request(ArtifactUrl);
257     Request.Headers = getHeaders();
258     Error Err = Client.perform(Request, Handler);
259     if (Err)
260       return std::move(Err);
261 
262     unsigned Code = Client.responseCode();
263     if (Code && Code != 200)
264       continue;
265 
266     // Return the path to the artifact on disk.
267     return std::string(AbsCachedArtifactPath);
268   }
269 
270   return createStringError(errc::argument_out_of_domain, "build id not found");
271 }
272 
273 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
274     : Message(Message.str()) {}
275 
276 void DebuginfodLog::push(const Twine &Message) {
277   push(DebuginfodLogEntry(Message));
278 }
279 
280 void DebuginfodLog::push(DebuginfodLogEntry Entry) {
281   {
282     std::lock_guard<std::mutex> Guard(QueueMutex);
283     LogEntryQueue.push(Entry);
284   }
285   QueueCondition.notify_one();
286 }
287 
288 DebuginfodLogEntry DebuginfodLog::pop() {
289   {
290     std::unique_lock<std::mutex> Guard(QueueMutex);
291     // Wait for messages to be pushed into the queue.
292     QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
293   }
294   std::lock_guard<std::mutex> Guard(QueueMutex);
295   if (!LogEntryQueue.size())
296     llvm_unreachable("Expected message in the queue.");
297 
298   DebuginfodLogEntry Entry = LogEntryQueue.front();
299   LogEntryQueue.pop();
300   return Entry;
301 }
302 
303 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
304                                            DebuginfodLog &Log, ThreadPool &Pool,
305                                            double MinInterval)
306     : Log(Log), Pool(Pool), MinInterval(MinInterval) {
307   for (StringRef Path : PathsRef)
308     Paths.push_back(Path.str());
309 }
310 
311 Error DebuginfodCollection::update() {
312   std::lock_guard<sys::Mutex> Guard(UpdateMutex);
313   if (UpdateTimer.isRunning())
314     UpdateTimer.stopTimer();
315   UpdateTimer.clear();
316   for (const std::string &Path : Paths) {
317     Log.push("Updating binaries at path " + Path);
318     if (Error Err = findBinaries(Path))
319       return Err;
320   }
321   Log.push("Updated collection");
322   UpdateTimer.startTimer();
323   return Error::success();
324 }
325 
326 Expected<bool> DebuginfodCollection::updateIfStale() {
327   if (!UpdateTimer.isRunning())
328     return false;
329   UpdateTimer.stopTimer();
330   double Time = UpdateTimer.getTotalTime().getWallTime();
331   UpdateTimer.startTimer();
332   if (Time < MinInterval)
333     return false;
334   if (Error Err = update())
335     return std::move(Err);
336   return true;
337 }
338 
339 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
340   while (true) {
341     if (Error Err = update())
342       return Err;
343     std::this_thread::sleep_for(Interval);
344   }
345   llvm_unreachable("updateForever loop should never end");
346 }
347 
348 static bool hasELFMagic(StringRef FilePath) {
349   file_magic Type;
350   std::error_code EC = identify_magic(FilePath, Type);
351   if (EC)
352     return false;
353   switch (Type) {
354   case file_magic::elf:
355   case file_magic::elf_relocatable:
356   case file_magic::elf_executable:
357   case file_magic::elf_shared_object:
358   case file_magic::elf_core:
359     return true;
360   default:
361     return false;
362   }
363 }
364 
365 Error DebuginfodCollection::findBinaries(StringRef Path) {
366   std::error_code EC;
367   sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
368   std::mutex IteratorMutex;
369   ThreadPoolTaskGroup IteratorGroup(Pool);
370   for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
371        WorkerIndex++) {
372     IteratorGroup.async([&, this]() -> void {
373       std::string FilePath;
374       while (true) {
375         {
376           // Check if iteration is over or there is an error during iteration
377           std::lock_guard<std::mutex> Guard(IteratorMutex);
378           if (I == E || EC)
379             return;
380           // Grab a file path from the directory iterator and advance the
381           // iterator.
382           FilePath = I->path();
383           I.increment(EC);
384         }
385 
386         // Inspect the file at this path to determine if it is debuginfo.
387         if (!hasELFMagic(FilePath))
388           continue;
389 
390         Expected<object::OwningBinary<object::Binary>> BinOrErr =
391             object::createBinary(FilePath);
392 
393         if (!BinOrErr) {
394           consumeError(BinOrErr.takeError());
395           continue;
396         }
397         object::Binary *Bin = std::move(BinOrErr.get().getBinary());
398         if (!Bin->isObject())
399           continue;
400 
401         // TODO: Support non-ELF binaries
402         object::ELFObjectFileBase *Object =
403             dyn_cast<object::ELFObjectFileBase>(Bin);
404         if (!Object)
405           continue;
406 
407         std::optional<BuildIDRef> ID = getBuildID(Object);
408         if (!ID)
409           continue;
410 
411         std::string IDString = buildIDToString(*ID);
412         if (Object->hasDebugInfo()) {
413           std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
414           (void)DebugBinaries.try_emplace(IDString, std::move(FilePath));
415         } else {
416           std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
417           (void)Binaries.try_emplace(IDString, std::move(FilePath));
418         }
419       }
420     });
421   }
422   IteratorGroup.wait();
423   std::unique_lock<std::mutex> Guard(IteratorMutex);
424   if (EC)
425     return errorCodeToError(EC);
426   return Error::success();
427 }
428 
429 Expected<std::optional<std::string>>
430 DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
431   Log.push("getting binary path of ID " + buildIDToString(ID));
432   std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
433   auto Loc = Binaries.find(buildIDToString(ID));
434   if (Loc != Binaries.end()) {
435     std::string Path = Loc->getValue();
436     return Path;
437   }
438   return std::nullopt;
439 }
440 
441 Expected<std::optional<std::string>>
442 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
443   Log.push("getting debug binary path of ID " + buildIDToString(ID));
444   std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
445   auto Loc = DebugBinaries.find(buildIDToString(ID));
446   if (Loc != DebugBinaries.end()) {
447     std::string Path = Loc->getValue();
448     return Path;
449   }
450   return std::nullopt;
451 }
452 
453 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
454   {
455     // Check collection; perform on-demand update if stale.
456     Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID);
457     if (!PathOrErr)
458       return PathOrErr.takeError();
459     std::optional<std::string> Path = *PathOrErr;
460     if (!Path) {
461       Expected<bool> UpdatedOrErr = updateIfStale();
462       if (!UpdatedOrErr)
463         return UpdatedOrErr.takeError();
464       if (*UpdatedOrErr) {
465         // Try once more.
466         PathOrErr = getBinaryPath(ID);
467         if (!PathOrErr)
468           return PathOrErr.takeError();
469         Path = *PathOrErr;
470       }
471     }
472     if (Path)
473       return *Path;
474   }
475 
476   // Try federation.
477   Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
478   if (!PathOrErr)
479     consumeError(PathOrErr.takeError());
480 
481   // Fall back to debug binary.
482   return findDebugBinaryPath(ID);
483 }
484 
485 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
486   // Check collection; perform on-demand update if stale.
487   Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
488   if (!PathOrErr)
489     return PathOrErr.takeError();
490   std::optional<std::string> Path = *PathOrErr;
491   if (!Path) {
492     Expected<bool> UpdatedOrErr = updateIfStale();
493     if (!UpdatedOrErr)
494       return UpdatedOrErr.takeError();
495     if (*UpdatedOrErr) {
496       // Try once more.
497       PathOrErr = getBinaryPath(ID);
498       if (!PathOrErr)
499         return PathOrErr.takeError();
500       Path = *PathOrErr;
501     }
502   }
503   if (Path)
504     return *Path;
505 
506   // Try federation.
507   return getCachedOrDownloadDebuginfo(ID);
508 }
509 
510 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
511                                    DebuginfodCollection &Collection)
512     : Log(Log), Collection(Collection) {
513   cantFail(
514       Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
515         Log.push("GET " + Request.UrlPath);
516         std::string IDString;
517         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
518           Request.setResponse(
519               {404, "text/plain", "Build ID is not a hex string\n"});
520           return;
521         }
522         object::BuildID ID(IDString.begin(), IDString.end());
523         Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
524         if (Error Err = PathOrErr.takeError()) {
525           consumeError(std::move(Err));
526           Request.setResponse({404, "text/plain", "Build ID not found\n"});
527           return;
528         }
529         streamFile(Request, *PathOrErr);
530       }));
531   cantFail(
532       Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
533         Log.push("GET " + Request.UrlPath);
534         std::string IDString;
535         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
536           Request.setResponse(
537               {404, "text/plain", "Build ID is not a hex string\n"});
538           return;
539         }
540         object::BuildID ID(IDString.begin(), IDString.end());
541         Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
542         if (Error Err = PathOrErr.takeError()) {
543           consumeError(std::move(Err));
544           Request.setResponse({404, "text/plain", "Build ID not found\n"});
545           return;
546         }
547         streamFile(Request, *PathOrErr);
548       }));
549 }
550 
551 } // namespace llvm
552