1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 ///
11 /// This file contains several definitions for the debuginfod client and server.
12 /// For the client, this file defines the fetchInfo function. For the server,
13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
15 /// function retrieves any of the three supported artifact types: (executable,
16 /// debuginfo, source file) associated with a build-id from debuginfod servers.
17 /// If a source file is to be fetched, its absolute path must be specified in
18 /// the Description argument to fetchInfo. The DebuginfodLogEntry,
19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
20 /// scan the local filesystem for binaries and serve the debuginfod protocol.
21 ///
22 //===----------------------------------------------------------------------===//
23
24 #include "llvm/Debuginfod/Debuginfod.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/BinaryFormat/Magic.h"
28 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
29 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
30 #include "llvm/Debuginfod/HTTPClient.h"
31 #include "llvm/Object/BuildID.h"
32 #include "llvm/Object/ELFObjectFile.h"
33 #include "llvm/Support/CachePruning.h"
34 #include "llvm/Support/Caching.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/FileUtilities.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/ThreadPool.h"
41 #include "llvm/Support/xxhash.h"
42
43 #include <atomic>
44 #include <thread>
45
46 namespace llvm {
47
48 using llvm::object::BuildIDRef;
49
uniqueKey(llvm::StringRef S)50 static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); }
51
52 // Returns a binary BuildID as a normalized hex string.
53 // Uses lowercase for compatibility with common debuginfod servers.
buildIDToString(BuildIDRef ID)54 static std::string buildIDToString(BuildIDRef ID) {
55 return llvm::toHex(ID, /*LowerCase=*/true);
56 }
57
canUseDebuginfod()58 bool canUseDebuginfod() {
59 return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty();
60 }
61
getDefaultDebuginfodUrls()62 SmallVector<StringRef> getDefaultDebuginfodUrls() {
63 const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS");
64 if (DebuginfodUrlsEnv == nullptr)
65 return SmallVector<StringRef>();
66
67 SmallVector<StringRef> DebuginfodUrls;
68 StringRef(DebuginfodUrlsEnv).split(DebuginfodUrls, " ");
69 return DebuginfodUrls;
70 }
71
72 /// Finds a default local file caching directory for the debuginfod client,
73 /// first checking DEBUGINFOD_CACHE_PATH.
getDefaultDebuginfodCacheDirectory()74 Expected<std::string> getDefaultDebuginfodCacheDirectory() {
75 if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
76 return CacheDirectoryEnv;
77
78 SmallString<64> CacheDirectory;
79 if (!sys::path::cache_directory(CacheDirectory))
80 return createStringError(
81 errc::io_error, "Unable to determine appropriate cache directory.");
82 sys::path::append(CacheDirectory, "llvm-debuginfod", "client");
83 return std::string(CacheDirectory);
84 }
85
getDefaultDebuginfodTimeout()86 std::chrono::milliseconds getDefaultDebuginfodTimeout() {
87 long Timeout;
88 const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");
89 if (DebuginfodTimeoutEnv &&
90 to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))
91 return std::chrono::milliseconds(Timeout * 1000);
92
93 return std::chrono::milliseconds(90 * 1000);
94 }
95
96 /// The following functions fetch a debuginfod artifact to a file in a local
97 /// cache and return the cached file path. They first search the local cache,
98 /// followed by the debuginfod servers.
99
getCachedOrDownloadSource(BuildIDRef ID,StringRef SourceFilePath)100 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
101 StringRef SourceFilePath) {
102 SmallString<64> UrlPath;
103 sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
104 buildIDToString(ID), "source",
105 sys::path::convert_to_slash(SourceFilePath));
106 return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
107 }
108
getCachedOrDownloadExecutable(BuildIDRef ID)109 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
110 SmallString<64> UrlPath;
111 sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
112 buildIDToString(ID), "executable");
113 return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
114 }
115
getCachedOrDownloadDebuginfo(BuildIDRef ID)116 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
117 SmallString<64> UrlPath;
118 sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
119 buildIDToString(ID), "debuginfo");
120 return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
121 }
122
123 // General fetching function.
getCachedOrDownloadArtifact(StringRef UniqueKey,StringRef UrlPath)124 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
125 StringRef UrlPath) {
126 SmallString<10> CacheDir;
127
128 Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
129 if (!CacheDirOrErr)
130 return CacheDirOrErr.takeError();
131 CacheDir = *CacheDirOrErr;
132
133 return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
134 getDefaultDebuginfodUrls(),
135 getDefaultDebuginfodTimeout());
136 }
137
138 namespace {
139
140 /// A simple handler which streams the returned data to a cache file. The cache
141 /// file is only created if a 200 OK status is observed.
142 class StreamedHTTPResponseHandler : public HTTPResponseHandler {
143 using CreateStreamFn =
144 std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
145 CreateStreamFn CreateStream;
146 HTTPClient &Client;
147 std::unique_ptr<CachedFileStream> FileStream;
148
149 public:
StreamedHTTPResponseHandler(CreateStreamFn CreateStream,HTTPClient & Client)150 StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
151 : CreateStream(CreateStream), Client(Client) {}
152 virtual ~StreamedHTTPResponseHandler() = default;
153
154 Error handleBodyChunk(StringRef BodyChunk) override;
155 };
156
157 } // namespace
158
handleBodyChunk(StringRef BodyChunk)159 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
160 if (!FileStream) {
161 unsigned Code = Client.responseCode();
162 if (Code && Code != 200)
163 return Error::success();
164 Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
165 CreateStream();
166 if (!FileStreamOrError)
167 return FileStreamOrError.takeError();
168 FileStream = std::move(*FileStreamOrError);
169 }
170 *FileStream->OS << BodyChunk;
171 return Error::success();
172 }
173
174 // An over-accepting simplification of the HTTP RFC 7230 spec.
isHeader(StringRef S)175 static bool isHeader(StringRef S) {
176 StringRef Name;
177 StringRef Value;
178 std::tie(Name, Value) = S.split(':');
179 if (Name.empty() || Value.empty())
180 return false;
181 return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) &&
182 all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; });
183 }
184
getHeaders()185 static SmallVector<std::string, 0> getHeaders() {
186 const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE");
187 if (!Filename)
188 return {};
189 ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile =
190 MemoryBuffer::getFile(Filename, /*IsText=*/true);
191 if (!HeadersFile)
192 return {};
193
194 SmallVector<std::string, 0> Headers;
195 uint64_t LineNumber = 0;
196 for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) {
197 LineNumber++;
198 if (!Line.empty() && Line.back() == '\r')
199 Line = Line.drop_back();
200 if (!isHeader(Line)) {
201 if (!all_of(Line, llvm::isSpace))
202 WithColor::warning()
203 << "could not parse debuginfod header: " << Filename << ':'
204 << LineNumber << '\n';
205 continue;
206 }
207 Headers.emplace_back(Line);
208 }
209 return Headers;
210 }
211
getCachedOrDownloadArtifact(StringRef UniqueKey,StringRef UrlPath,StringRef CacheDirectoryPath,ArrayRef<StringRef> DebuginfodUrls,std::chrono::milliseconds Timeout)212 Expected<std::string> getCachedOrDownloadArtifact(
213 StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
214 ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
215 SmallString<64> AbsCachedArtifactPath;
216 sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,
217 "llvmcache-" + UniqueKey);
218
219 Expected<FileCache> CacheOrErr =
220 localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);
221 if (!CacheOrErr)
222 return CacheOrErr.takeError();
223
224 FileCache Cache = *CacheOrErr;
225 // We choose an arbitrary Task parameter as we do not make use of it.
226 unsigned Task = 0;
227 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, "");
228 if (!CacheAddStreamOrErr)
229 return CacheAddStreamOrErr.takeError();
230 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
231 if (!CacheAddStream)
232 return std::string(AbsCachedArtifactPath);
233 // The artifact was not found in the local cache, query the debuginfod
234 // servers.
235 if (!HTTPClient::isAvailable())
236 return createStringError(errc::io_error,
237 "No working HTTP client is available.");
238
239 if (!HTTPClient::IsInitialized)
240 return createStringError(
241 errc::io_error,
242 "A working HTTP client is available, but it is not initialized. To "
243 "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
244 "at the beginning of main.");
245
246 HTTPClient Client;
247 Client.setTimeout(Timeout);
248 for (StringRef ServerUrl : DebuginfodUrls) {
249 SmallString<64> ArtifactUrl;
250 sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
251
252 // Perform the HTTP request and if successful, write the response body to
253 // the cache.
254 StreamedHTTPResponseHandler Handler(
255 [&]() { return CacheAddStream(Task, ""); }, Client);
256 HTTPRequest Request(ArtifactUrl);
257 Request.Headers = getHeaders();
258 Error Err = Client.perform(Request, Handler);
259 if (Err)
260 return std::move(Err);
261
262 unsigned Code = Client.responseCode();
263 if (Code && Code != 200)
264 continue;
265
266 // Return the path to the artifact on disk.
267 return std::string(AbsCachedArtifactPath);
268 }
269
270 return createStringError(errc::argument_out_of_domain, "build id not found");
271 }
272
DebuginfodLogEntry(const Twine & Message)273 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
274 : Message(Message.str()) {}
275
push(const Twine & Message)276 void DebuginfodLog::push(const Twine &Message) {
277 push(DebuginfodLogEntry(Message));
278 }
279
push(DebuginfodLogEntry Entry)280 void DebuginfodLog::push(DebuginfodLogEntry Entry) {
281 {
282 std::lock_guard<std::mutex> Guard(QueueMutex);
283 LogEntryQueue.push(Entry);
284 }
285 QueueCondition.notify_one();
286 }
287
pop()288 DebuginfodLogEntry DebuginfodLog::pop() {
289 {
290 std::unique_lock<std::mutex> Guard(QueueMutex);
291 // Wait for messages to be pushed into the queue.
292 QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
293 }
294 std::lock_guard<std::mutex> Guard(QueueMutex);
295 if (!LogEntryQueue.size())
296 llvm_unreachable("Expected message in the queue.");
297
298 DebuginfodLogEntry Entry = LogEntryQueue.front();
299 LogEntryQueue.pop();
300 return Entry;
301 }
302
DebuginfodCollection(ArrayRef<StringRef> PathsRef,DebuginfodLog & Log,ThreadPool & Pool,double MinInterval)303 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
304 DebuginfodLog &Log, ThreadPool &Pool,
305 double MinInterval)
306 : Log(Log), Pool(Pool), MinInterval(MinInterval) {
307 for (StringRef Path : PathsRef)
308 Paths.push_back(Path.str());
309 }
310
update()311 Error DebuginfodCollection::update() {
312 std::lock_guard<sys::Mutex> Guard(UpdateMutex);
313 if (UpdateTimer.isRunning())
314 UpdateTimer.stopTimer();
315 UpdateTimer.clear();
316 for (const std::string &Path : Paths) {
317 Log.push("Updating binaries at path " + Path);
318 if (Error Err = findBinaries(Path))
319 return Err;
320 }
321 Log.push("Updated collection");
322 UpdateTimer.startTimer();
323 return Error::success();
324 }
325
updateIfStale()326 Expected<bool> DebuginfodCollection::updateIfStale() {
327 if (!UpdateTimer.isRunning())
328 return false;
329 UpdateTimer.stopTimer();
330 double Time = UpdateTimer.getTotalTime().getWallTime();
331 UpdateTimer.startTimer();
332 if (Time < MinInterval)
333 return false;
334 if (Error Err = update())
335 return std::move(Err);
336 return true;
337 }
338
updateForever(std::chrono::milliseconds Interval)339 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
340 while (true) {
341 if (Error Err = update())
342 return Err;
343 std::this_thread::sleep_for(Interval);
344 }
345 llvm_unreachable("updateForever loop should never end");
346 }
347
hasELFMagic(StringRef FilePath)348 static bool hasELFMagic(StringRef FilePath) {
349 file_magic Type;
350 std::error_code EC = identify_magic(FilePath, Type);
351 if (EC)
352 return false;
353 switch (Type) {
354 case file_magic::elf:
355 case file_magic::elf_relocatable:
356 case file_magic::elf_executable:
357 case file_magic::elf_shared_object:
358 case file_magic::elf_core:
359 return true;
360 default:
361 return false;
362 }
363 }
364
findBinaries(StringRef Path)365 Error DebuginfodCollection::findBinaries(StringRef Path) {
366 std::error_code EC;
367 sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
368 std::mutex IteratorMutex;
369 ThreadPoolTaskGroup IteratorGroup(Pool);
370 for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
371 WorkerIndex++) {
372 IteratorGroup.async([&, this]() -> void {
373 std::string FilePath;
374 while (true) {
375 {
376 // Check if iteration is over or there is an error during iteration
377 std::lock_guard<std::mutex> Guard(IteratorMutex);
378 if (I == E || EC)
379 return;
380 // Grab a file path from the directory iterator and advance the
381 // iterator.
382 FilePath = I->path();
383 I.increment(EC);
384 }
385
386 // Inspect the file at this path to determine if it is debuginfo.
387 if (!hasELFMagic(FilePath))
388 continue;
389
390 Expected<object::OwningBinary<object::Binary>> BinOrErr =
391 object::createBinary(FilePath);
392
393 if (!BinOrErr) {
394 consumeError(BinOrErr.takeError());
395 continue;
396 }
397 object::Binary *Bin = std::move(BinOrErr.get().getBinary());
398 if (!Bin->isObject())
399 continue;
400
401 // TODO: Support non-ELF binaries
402 object::ELFObjectFileBase *Object =
403 dyn_cast<object::ELFObjectFileBase>(Bin);
404 if (!Object)
405 continue;
406
407 std::optional<BuildIDRef> ID = getBuildID(Object);
408 if (!ID)
409 continue;
410
411 std::string IDString = buildIDToString(*ID);
412 if (Object->hasDebugInfo()) {
413 std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
414 (void)DebugBinaries.try_emplace(IDString, std::move(FilePath));
415 } else {
416 std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
417 (void)Binaries.try_emplace(IDString, std::move(FilePath));
418 }
419 }
420 });
421 }
422 IteratorGroup.wait();
423 std::unique_lock<std::mutex> Guard(IteratorMutex);
424 if (EC)
425 return errorCodeToError(EC);
426 return Error::success();
427 }
428
429 Expected<std::optional<std::string>>
getBinaryPath(BuildIDRef ID)430 DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
431 Log.push("getting binary path of ID " + buildIDToString(ID));
432 std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
433 auto Loc = Binaries.find(buildIDToString(ID));
434 if (Loc != Binaries.end()) {
435 std::string Path = Loc->getValue();
436 return Path;
437 }
438 return std::nullopt;
439 }
440
441 Expected<std::optional<std::string>>
getDebugBinaryPath(BuildIDRef ID)442 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
443 Log.push("getting debug binary path of ID " + buildIDToString(ID));
444 std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
445 auto Loc = DebugBinaries.find(buildIDToString(ID));
446 if (Loc != DebugBinaries.end()) {
447 std::string Path = Loc->getValue();
448 return Path;
449 }
450 return std::nullopt;
451 }
452
findBinaryPath(BuildIDRef ID)453 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
454 {
455 // Check collection; perform on-demand update if stale.
456 Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID);
457 if (!PathOrErr)
458 return PathOrErr.takeError();
459 std::optional<std::string> Path = *PathOrErr;
460 if (!Path) {
461 Expected<bool> UpdatedOrErr = updateIfStale();
462 if (!UpdatedOrErr)
463 return UpdatedOrErr.takeError();
464 if (*UpdatedOrErr) {
465 // Try once more.
466 PathOrErr = getBinaryPath(ID);
467 if (!PathOrErr)
468 return PathOrErr.takeError();
469 Path = *PathOrErr;
470 }
471 }
472 if (Path)
473 return *Path;
474 }
475
476 // Try federation.
477 Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
478 if (!PathOrErr)
479 consumeError(PathOrErr.takeError());
480
481 // Fall back to debug binary.
482 return findDebugBinaryPath(ID);
483 }
484
findDebugBinaryPath(BuildIDRef ID)485 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
486 // Check collection; perform on-demand update if stale.
487 Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
488 if (!PathOrErr)
489 return PathOrErr.takeError();
490 std::optional<std::string> Path = *PathOrErr;
491 if (!Path) {
492 Expected<bool> UpdatedOrErr = updateIfStale();
493 if (!UpdatedOrErr)
494 return UpdatedOrErr.takeError();
495 if (*UpdatedOrErr) {
496 // Try once more.
497 PathOrErr = getBinaryPath(ID);
498 if (!PathOrErr)
499 return PathOrErr.takeError();
500 Path = *PathOrErr;
501 }
502 }
503 if (Path)
504 return *Path;
505
506 // Try federation.
507 return getCachedOrDownloadDebuginfo(ID);
508 }
509
DebuginfodServer(DebuginfodLog & Log,DebuginfodCollection & Collection)510 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
511 DebuginfodCollection &Collection)
512 : Log(Log), Collection(Collection) {
513 cantFail(
514 Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
515 Log.push("GET " + Request.UrlPath);
516 std::string IDString;
517 if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
518 Request.setResponse(
519 {404, "text/plain", "Build ID is not a hex string\n"});
520 return;
521 }
522 object::BuildID ID(IDString.begin(), IDString.end());
523 Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
524 if (Error Err = PathOrErr.takeError()) {
525 consumeError(std::move(Err));
526 Request.setResponse({404, "text/plain", "Build ID not found\n"});
527 return;
528 }
529 streamFile(Request, *PathOrErr);
530 }));
531 cantFail(
532 Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
533 Log.push("GET " + Request.UrlPath);
534 std::string IDString;
535 if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
536 Request.setResponse(
537 {404, "text/plain", "Build ID is not a hex string\n"});
538 return;
539 }
540 object::BuildID ID(IDString.begin(), IDString.end());
541 Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
542 if (Error Err = PathOrErr.takeError()) {
543 consumeError(std::move(Err));
544 Request.setResponse({404, "text/plain", "Build ID not found\n"});
545 return;
546 }
547 streamFile(Request, *PathOrErr);
548 }));
549 }
550
551 } // namespace llvm
552