1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 ///
11 /// This file contains several definitions for the debuginfod client and server.
12 /// For the client, this file defines the fetchInfo function. For the server,
13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
15 /// function retrieves any of the three supported artifact types: (executable,
16 /// debuginfo, source file) associated with a build-id from debuginfod servers.
17 /// If a source file is to be fetched, its absolute path must be specified in
18 /// the Description argument to fetchInfo. The DebuginfodLogEntry,
19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
20 /// scan the local filesystem for binaries and serve the debuginfod protocol.
21 ///
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/Debuginfod/Debuginfod.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/BinaryFormat/Magic.h"
28 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
29 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
30 #include "llvm/Debuginfod/HTTPClient.h"
31 #include "llvm/Object/BuildID.h"
32 #include "llvm/Object/ELFObjectFile.h"
33 #include "llvm/Support/CachePruning.h"
34 #include "llvm/Support/Caching.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/FileUtilities.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/ThreadPool.h"
41 #include "llvm/Support/xxhash.h"
42 
43 #include <atomic>
44 #include <thread>
45 
46 namespace llvm {
47 
48 using llvm::object::BuildIDRef;
49 
50 static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); }
51 
52 // Returns a binary BuildID as a normalized hex string.
53 // Uses lowercase for compatibility with common debuginfod servers.
54 static std::string buildIDToString(BuildIDRef ID) {
55   return llvm::toHex(ID, /*LowerCase=*/true);
56 }
57 
58 bool canUseDebuginfod() {
59   return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty();
60 }
61 
62 SmallVector<StringRef> getDefaultDebuginfodUrls() {
63   const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS");
64   if (DebuginfodUrlsEnv == nullptr)
65     return SmallVector<StringRef>();
66 
67   SmallVector<StringRef> DebuginfodUrls;
68   StringRef(DebuginfodUrlsEnv).split(DebuginfodUrls, " ");
69   return DebuginfodUrls;
70 }
71 
72 /// Finds a default local file caching directory for the debuginfod client,
73 /// first checking DEBUGINFOD_CACHE_PATH.
74 Expected<std::string> getDefaultDebuginfodCacheDirectory() {
75   if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
76     return CacheDirectoryEnv;
77 
78   SmallString<64> CacheDirectory;
79   if (!sys::path::cache_directory(CacheDirectory))
80     return createStringError(
81         errc::io_error, "Unable to determine appropriate cache directory.");
82   sys::path::append(CacheDirectory, "llvm-debuginfod", "client");
83   return std::string(CacheDirectory);
84 }
85 
86 std::chrono::milliseconds getDefaultDebuginfodTimeout() {
87   long Timeout;
88   const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");
89   if (DebuginfodTimeoutEnv &&
90       to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))
91     return std::chrono::milliseconds(Timeout * 1000);
92 
93   return std::chrono::milliseconds(90 * 1000);
94 }
95 
96 /// The following functions fetch a debuginfod artifact to a file in a local
97 /// cache and return the cached file path. They first search the local cache,
98 /// followed by the debuginfod servers.
99 
100 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
101                                                 StringRef SourceFilePath) {
102   SmallString<64> UrlPath;
103   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
104                     buildIDToString(ID), "source",
105                     sys::path::convert_to_slash(SourceFilePath));
106   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
107 }
108 
109 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
110   SmallString<64> UrlPath;
111   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
112                     buildIDToString(ID), "executable");
113   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
114 }
115 
116 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
117   SmallString<64> UrlPath;
118   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
119                     buildIDToString(ID), "debuginfo");
120   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
121 }
122 
123 // General fetching function.
124 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
125                                                   StringRef UrlPath) {
126   SmallString<10> CacheDir;
127 
128   Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
129   if (!CacheDirOrErr)
130     return CacheDirOrErr.takeError();
131   CacheDir = *CacheDirOrErr;
132 
133   return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
134                                      getDefaultDebuginfodUrls(),
135                                      getDefaultDebuginfodTimeout());
136 }
137 
138 namespace {
139 
140 /// A simple handler which streams the returned data to a cache file. The cache
141 /// file is only created if a 200 OK status is observed.
142 class StreamedHTTPResponseHandler : public HTTPResponseHandler {
143   using CreateStreamFn =
144       std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
145   CreateStreamFn CreateStream;
146   HTTPClient &Client;
147   std::unique_ptr<CachedFileStream> FileStream;
148 
149 public:
150   StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
151       : CreateStream(CreateStream), Client(Client) {}
152   virtual ~StreamedHTTPResponseHandler() = default;
153 
154   Error handleBodyChunk(StringRef BodyChunk) override;
155 };
156 
157 } // namespace
158 
159 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
160   if (!FileStream) {
161     unsigned Code = Client.responseCode();
162     if (Code && Code != 200)
163       return Error::success();
164     Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
165         CreateStream();
166     if (!FileStreamOrError)
167       return FileStreamOrError.takeError();
168     FileStream = std::move(*FileStreamOrError);
169   }
170   *FileStream->OS << BodyChunk;
171   return Error::success();
172 }
173 
174 // An over-accepting simplification of the HTTP RFC 7230 spec.
175 static bool isHeader(StringRef S) {
176   StringRef Name;
177   StringRef Value;
178   std::tie(Name, Value) = S.split(':');
179   if (Name.empty() || Value.empty())
180     return false;
181   return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) &&
182          all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; });
183 }
184 
185 static SmallVector<std::string, 0> getHeaders() {
186   const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE");
187   if (!Filename)
188     return {};
189   ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile =
190       MemoryBuffer::getFile(Filename, /*IsText=*/true);
191   if (!HeadersFile)
192     return {};
193 
194   SmallVector<std::string, 0> Headers;
195   uint64_t LineNumber = 0;
196   for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) {
197     LineNumber++;
198     if (!Line.empty() && Line.back() == '\r')
199       Line = Line.drop_back();
200     if (!isHeader(Line)) {
201       if (!all_of(Line, llvm::isSpace))
202         WithColor::warning()
203             << "could not parse debuginfod header: " << Filename << ':'
204             << LineNumber << '\n';
205       continue;
206     }
207     Headers.emplace_back(Line);
208   }
209   return Headers;
210 }
211 
212 Expected<std::string> getCachedOrDownloadArtifact(
213     StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
214     ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
215   SmallString<64> AbsCachedArtifactPath;
216   sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,
217                     "llvmcache-" + UniqueKey);
218 
219   Expected<FileCache> CacheOrErr =
220       localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);
221   if (!CacheOrErr)
222     return CacheOrErr.takeError();
223 
224   FileCache Cache = *CacheOrErr;
225   // We choose an arbitrary Task parameter as we do not make use of it.
226   unsigned Task = 0;
227   Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, "");
228   if (!CacheAddStreamOrErr)
229     return CacheAddStreamOrErr.takeError();
230   AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
231   if (!CacheAddStream)
232     return std::string(AbsCachedArtifactPath);
233   // The artifact was not found in the local cache, query the debuginfod
234   // servers.
235   if (!HTTPClient::isAvailable())
236     return createStringError(errc::io_error,
237                              "No working HTTP client is available.");
238 
239   if (!HTTPClient::IsInitialized)
240     return createStringError(
241         errc::io_error,
242         "A working HTTP client is available, but it is not initialized. To "
243         "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
244         "at the beginning of main.");
245 
246   HTTPClient Client;
247   Client.setTimeout(Timeout);
248   for (StringRef ServerUrl : DebuginfodUrls) {
249     SmallString<64> ArtifactUrl;
250     sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
251 
252     // Perform the HTTP request and if successful, write the response body to
253     // the cache.
254     {
255       StreamedHTTPResponseHandler Handler(
256           [&]() { return CacheAddStream(Task, ""); }, Client);
257       HTTPRequest Request(ArtifactUrl);
258       Request.Headers = getHeaders();
259       Error Err = Client.perform(Request, Handler);
260       if (Err)
261         return std::move(Err);
262 
263       unsigned Code = Client.responseCode();
264       if (Code && Code != 200)
265         continue;
266     }
267 
268     Expected<CachePruningPolicy> PruningPolicyOrErr =
269         parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY"));
270     if (!PruningPolicyOrErr)
271       return PruningPolicyOrErr.takeError();
272     pruneCache(CacheDirectoryPath, *PruningPolicyOrErr);
273 
274     // Return the path to the artifact on disk.
275     return std::string(AbsCachedArtifactPath);
276   }
277 
278   return createStringError(errc::argument_out_of_domain, "build id not found");
279 }
280 
281 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
282     : Message(Message.str()) {}
283 
284 void DebuginfodLog::push(const Twine &Message) {
285   push(DebuginfodLogEntry(Message));
286 }
287 
288 void DebuginfodLog::push(DebuginfodLogEntry Entry) {
289   {
290     std::lock_guard<std::mutex> Guard(QueueMutex);
291     LogEntryQueue.push(Entry);
292   }
293   QueueCondition.notify_one();
294 }
295 
296 DebuginfodLogEntry DebuginfodLog::pop() {
297   {
298     std::unique_lock<std::mutex> Guard(QueueMutex);
299     // Wait for messages to be pushed into the queue.
300     QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
301   }
302   std::lock_guard<std::mutex> Guard(QueueMutex);
303   if (!LogEntryQueue.size())
304     llvm_unreachable("Expected message in the queue.");
305 
306   DebuginfodLogEntry Entry = LogEntryQueue.front();
307   LogEntryQueue.pop();
308   return Entry;
309 }
310 
311 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
312                                            DebuginfodLog &Log, ThreadPool &Pool,
313                                            double MinInterval)
314     : Log(Log), Pool(Pool), MinInterval(MinInterval) {
315   for (StringRef Path : PathsRef)
316     Paths.push_back(Path.str());
317 }
318 
319 Error DebuginfodCollection::update() {
320   std::lock_guard<sys::Mutex> Guard(UpdateMutex);
321   if (UpdateTimer.isRunning())
322     UpdateTimer.stopTimer();
323   UpdateTimer.clear();
324   for (const std::string &Path : Paths) {
325     Log.push("Updating binaries at path " + Path);
326     if (Error Err = findBinaries(Path))
327       return Err;
328   }
329   Log.push("Updated collection");
330   UpdateTimer.startTimer();
331   return Error::success();
332 }
333 
334 Expected<bool> DebuginfodCollection::updateIfStale() {
335   if (!UpdateTimer.isRunning())
336     return false;
337   UpdateTimer.stopTimer();
338   double Time = UpdateTimer.getTotalTime().getWallTime();
339   UpdateTimer.startTimer();
340   if (Time < MinInterval)
341     return false;
342   if (Error Err = update())
343     return std::move(Err);
344   return true;
345 }
346 
347 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
348   while (true) {
349     if (Error Err = update())
350       return Err;
351     std::this_thread::sleep_for(Interval);
352   }
353   llvm_unreachable("updateForever loop should never end");
354 }
355 
356 static bool hasELFMagic(StringRef FilePath) {
357   file_magic Type;
358   std::error_code EC = identify_magic(FilePath, Type);
359   if (EC)
360     return false;
361   switch (Type) {
362   case file_magic::elf:
363   case file_magic::elf_relocatable:
364   case file_magic::elf_executable:
365   case file_magic::elf_shared_object:
366   case file_magic::elf_core:
367     return true;
368   default:
369     return false;
370   }
371 }
372 
373 Error DebuginfodCollection::findBinaries(StringRef Path) {
374   std::error_code EC;
375   sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
376   std::mutex IteratorMutex;
377   ThreadPoolTaskGroup IteratorGroup(Pool);
378   for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
379        WorkerIndex++) {
380     IteratorGroup.async([&, this]() -> void {
381       std::string FilePath;
382       while (true) {
383         {
384           // Check if iteration is over or there is an error during iteration
385           std::lock_guard<std::mutex> Guard(IteratorMutex);
386           if (I == E || EC)
387             return;
388           // Grab a file path from the directory iterator and advance the
389           // iterator.
390           FilePath = I->path();
391           I.increment(EC);
392         }
393 
394         // Inspect the file at this path to determine if it is debuginfo.
395         if (!hasELFMagic(FilePath))
396           continue;
397 
398         Expected<object::OwningBinary<object::Binary>> BinOrErr =
399             object::createBinary(FilePath);
400 
401         if (!BinOrErr) {
402           consumeError(BinOrErr.takeError());
403           continue;
404         }
405         object::Binary *Bin = std::move(BinOrErr.get().getBinary());
406         if (!Bin->isObject())
407           continue;
408 
409         // TODO: Support non-ELF binaries
410         object::ELFObjectFileBase *Object =
411             dyn_cast<object::ELFObjectFileBase>(Bin);
412         if (!Object)
413           continue;
414 
415         BuildIDRef ID = getBuildID(Object);
416         if (ID.empty())
417           continue;
418 
419         std::string IDString = buildIDToString(ID);
420         if (Object->hasDebugInfo()) {
421           std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
422           (void)DebugBinaries.try_emplace(IDString, std::move(FilePath));
423         } else {
424           std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
425           (void)Binaries.try_emplace(IDString, std::move(FilePath));
426         }
427       }
428     });
429   }
430   IteratorGroup.wait();
431   std::unique_lock<std::mutex> Guard(IteratorMutex);
432   if (EC)
433     return errorCodeToError(EC);
434   return Error::success();
435 }
436 
437 Expected<std::optional<std::string>>
438 DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
439   Log.push("getting binary path of ID " + buildIDToString(ID));
440   std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
441   auto Loc = Binaries.find(buildIDToString(ID));
442   if (Loc != Binaries.end()) {
443     std::string Path = Loc->getValue();
444     return Path;
445   }
446   return std::nullopt;
447 }
448 
449 Expected<std::optional<std::string>>
450 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
451   Log.push("getting debug binary path of ID " + buildIDToString(ID));
452   std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
453   auto Loc = DebugBinaries.find(buildIDToString(ID));
454   if (Loc != DebugBinaries.end()) {
455     std::string Path = Loc->getValue();
456     return Path;
457   }
458   return std::nullopt;
459 }
460 
461 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
462   {
463     // Check collection; perform on-demand update if stale.
464     Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID);
465     if (!PathOrErr)
466       return PathOrErr.takeError();
467     std::optional<std::string> Path = *PathOrErr;
468     if (!Path) {
469       Expected<bool> UpdatedOrErr = updateIfStale();
470       if (!UpdatedOrErr)
471         return UpdatedOrErr.takeError();
472       if (*UpdatedOrErr) {
473         // Try once more.
474         PathOrErr = getBinaryPath(ID);
475         if (!PathOrErr)
476           return PathOrErr.takeError();
477         Path = *PathOrErr;
478       }
479     }
480     if (Path)
481       return *Path;
482   }
483 
484   // Try federation.
485   Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
486   if (!PathOrErr)
487     consumeError(PathOrErr.takeError());
488 
489   // Fall back to debug binary.
490   return findDebugBinaryPath(ID);
491 }
492 
493 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
494   // Check collection; perform on-demand update if stale.
495   Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
496   if (!PathOrErr)
497     return PathOrErr.takeError();
498   std::optional<std::string> Path = *PathOrErr;
499   if (!Path) {
500     Expected<bool> UpdatedOrErr = updateIfStale();
501     if (!UpdatedOrErr)
502       return UpdatedOrErr.takeError();
503     if (*UpdatedOrErr) {
504       // Try once more.
505       PathOrErr = getBinaryPath(ID);
506       if (!PathOrErr)
507         return PathOrErr.takeError();
508       Path = *PathOrErr;
509     }
510   }
511   if (Path)
512     return *Path;
513 
514   // Try federation.
515   return getCachedOrDownloadDebuginfo(ID);
516 }
517 
518 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
519                                    DebuginfodCollection &Collection)
520     : Log(Log), Collection(Collection) {
521   cantFail(
522       Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
523         Log.push("GET " + Request.UrlPath);
524         std::string IDString;
525         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
526           Request.setResponse(
527               {404, "text/plain", "Build ID is not a hex string\n"});
528           return;
529         }
530         object::BuildID ID(IDString.begin(), IDString.end());
531         Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
532         if (Error Err = PathOrErr.takeError()) {
533           consumeError(std::move(Err));
534           Request.setResponse({404, "text/plain", "Build ID not found\n"});
535           return;
536         }
537         streamFile(Request, *PathOrErr);
538       }));
539   cantFail(
540       Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
541         Log.push("GET " + Request.UrlPath);
542         std::string IDString;
543         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
544           Request.setResponse(
545               {404, "text/plain", "Build ID is not a hex string\n"});
546           return;
547         }
548         object::BuildID ID(IDString.begin(), IDString.end());
549         Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
550         if (Error Err = PathOrErr.takeError()) {
551           consumeError(std::move(Err));
552           Request.setResponse({404, "text/plain", "Build ID not found\n"});
553           return;
554         }
555         streamFile(Request, *PathOrErr);
556       }));
557 }
558 
559 } // namespace llvm
560