1 //===-- LLVMSymbolize.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation for LLVM symbolization library.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
14 
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/DebugInfo/BTF/BTFContext.h"
17 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
18 #include "llvm/DebugInfo/PDB/PDB.h"
19 #include "llvm/DebugInfo/PDB/PDBContext.h"
20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
21 #include "llvm/Demangle/Demangle.h"
22 #include "llvm/Object/BuildID.h"
23 #include "llvm/Object/COFF.h"
24 #include "llvm/Object/ELFObjectFile.h"
25 #include "llvm/Object/MachO.h"
26 #include "llvm/Object/MachOUniversal.h"
27 #include "llvm/Support/CRC.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/DataExtractor.h"
30 #include "llvm/Support/Errc.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/MemoryBuffer.h"
33 #include "llvm/Support/Path.h"
34 #include <algorithm>
35 #include <cassert>
36 #include <cstring>
37 
38 namespace llvm {
39 namespace codeview {
40 union DebugInfo;
41 }
42 namespace symbolize {
43 
44 LLVMSymbolizer::LLVMSymbolizer() = default;
45 
LLVMSymbolizer(const Options & Opts)46 LLVMSymbolizer::LLVMSymbolizer(const Options &Opts)
47     : Opts(Opts),
48       BIDFetcher(std::make_unique<BuildIDFetcher>(Opts.DebugFileDirectory)) {}
49 
50 LLVMSymbolizer::~LLVMSymbolizer() = default;
51 
52 template <typename T>
53 Expected<DILineInfo>
symbolizeCodeCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)54 LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier,
55                                     object::SectionedAddress ModuleOffset) {
56 
57   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
58   if (!InfoOrErr)
59     return InfoOrErr.takeError();
60 
61   SymbolizableModule *Info = *InfoOrErr;
62 
63   // A null module means an error has already been reported. Return an empty
64   // result.
65   if (!Info)
66     return DILineInfo();
67 
68   // If the user is giving us relative addresses, add the preferred base of the
69   // object to the offset before we do the query. It's what DIContext expects.
70   if (Opts.RelativeAddresses)
71     ModuleOffset.Address += Info->getModulePreferredBase();
72 
73   DILineInfo LineInfo = Info->symbolizeCode(
74       ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
75       Opts.UseSymbolTable);
76   if (Opts.Demangle)
77     LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
78   return LineInfo;
79 }
80 
81 Expected<DILineInfo>
symbolizeCode(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)82 LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj,
83                               object::SectionedAddress ModuleOffset) {
84   return symbolizeCodeCommon(Obj, ModuleOffset);
85 }
86 
87 Expected<DILineInfo>
symbolizeCode(const std::string & ModuleName,object::SectionedAddress ModuleOffset)88 LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
89                               object::SectionedAddress ModuleOffset) {
90   return symbolizeCodeCommon(ModuleName, ModuleOffset);
91 }
92 
93 Expected<DILineInfo>
symbolizeCode(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)94 LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID,
95                               object::SectionedAddress ModuleOffset) {
96   return symbolizeCodeCommon(BuildID, ModuleOffset);
97 }
98 
99 template <typename T>
symbolizeInlinedCodeCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)100 Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon(
101     const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) {
102   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
103   if (!InfoOrErr)
104     return InfoOrErr.takeError();
105 
106   SymbolizableModule *Info = *InfoOrErr;
107 
108   // A null module means an error has already been reported. Return an empty
109   // result.
110   if (!Info)
111     return DIInliningInfo();
112 
113   // If the user is giving us relative addresses, add the preferred base of the
114   // object to the offset before we do the query. It's what DIContext expects.
115   if (Opts.RelativeAddresses)
116     ModuleOffset.Address += Info->getModulePreferredBase();
117 
118   DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
119       ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
120       Opts.UseSymbolTable);
121   if (Opts.Demangle) {
122     for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
123       auto *Frame = InlinedContext.getMutableFrame(i);
124       Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
125     }
126   }
127   return InlinedContext;
128 }
129 
130 Expected<DIInliningInfo>
symbolizeInlinedCode(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)131 LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj,
132                                      object::SectionedAddress ModuleOffset) {
133   return symbolizeInlinedCodeCommon(Obj, ModuleOffset);
134 }
135 
136 Expected<DIInliningInfo>
symbolizeInlinedCode(const std::string & ModuleName,object::SectionedAddress ModuleOffset)137 LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
138                                      object::SectionedAddress ModuleOffset) {
139   return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset);
140 }
141 
142 Expected<DIInliningInfo>
symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)143 LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,
144                                      object::SectionedAddress ModuleOffset) {
145   return symbolizeInlinedCodeCommon(BuildID, ModuleOffset);
146 }
147 
148 template <typename T>
149 Expected<DIGlobal>
symbolizeDataCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)150 LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier,
151                                     object::SectionedAddress ModuleOffset) {
152 
153   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
154   if (!InfoOrErr)
155     return InfoOrErr.takeError();
156 
157   SymbolizableModule *Info = *InfoOrErr;
158   // A null module means an error has already been reported. Return an empty
159   // result.
160   if (!Info)
161     return DIGlobal();
162 
163   // If the user is giving us relative addresses, add the preferred base of
164   // the object to the offset before we do the query. It's what DIContext
165   // expects.
166   if (Opts.RelativeAddresses)
167     ModuleOffset.Address += Info->getModulePreferredBase();
168 
169   DIGlobal Global = Info->symbolizeData(ModuleOffset);
170   if (Opts.Demangle)
171     Global.Name = DemangleName(Global.Name, Info);
172   return Global;
173 }
174 
175 Expected<DIGlobal>
symbolizeData(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)176 LLVMSymbolizer::symbolizeData(const ObjectFile &Obj,
177                               object::SectionedAddress ModuleOffset) {
178   return symbolizeDataCommon(Obj, ModuleOffset);
179 }
180 
181 Expected<DIGlobal>
symbolizeData(const std::string & ModuleName,object::SectionedAddress ModuleOffset)182 LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
183                               object::SectionedAddress ModuleOffset) {
184   return symbolizeDataCommon(ModuleName, ModuleOffset);
185 }
186 
187 Expected<DIGlobal>
symbolizeData(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)188 LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID,
189                               object::SectionedAddress ModuleOffset) {
190   return symbolizeDataCommon(BuildID, ModuleOffset);
191 }
192 
193 template <typename T>
194 Expected<std::vector<DILocal>>
symbolizeFrameCommon(const T & ModuleSpecifier,object::SectionedAddress ModuleOffset)195 LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier,
196                                      object::SectionedAddress ModuleOffset) {
197   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
198   if (!InfoOrErr)
199     return InfoOrErr.takeError();
200 
201   SymbolizableModule *Info = *InfoOrErr;
202   // A null module means an error has already been reported. Return an empty
203   // result.
204   if (!Info)
205     return std::vector<DILocal>();
206 
207   // If the user is giving us relative addresses, add the preferred base of
208   // the object to the offset before we do the query. It's what DIContext
209   // expects.
210   if (Opts.RelativeAddresses)
211     ModuleOffset.Address += Info->getModulePreferredBase();
212 
213   return Info->symbolizeFrame(ModuleOffset);
214 }
215 
216 Expected<std::vector<DILocal>>
symbolizeFrame(const ObjectFile & Obj,object::SectionedAddress ModuleOffset)217 LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj,
218                                object::SectionedAddress ModuleOffset) {
219   return symbolizeFrameCommon(Obj, ModuleOffset);
220 }
221 
222 Expected<std::vector<DILocal>>
symbolizeFrame(const std::string & ModuleName,object::SectionedAddress ModuleOffset)223 LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName,
224                                object::SectionedAddress ModuleOffset) {
225   return symbolizeFrameCommon(ModuleName, ModuleOffset);
226 }
227 
228 Expected<std::vector<DILocal>>
symbolizeFrame(ArrayRef<uint8_t> BuildID,object::SectionedAddress ModuleOffset)229 LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
230                                object::SectionedAddress ModuleOffset) {
231   return symbolizeFrameCommon(BuildID, ModuleOffset);
232 }
233 
234 template <typename T>
235 Expected<std::vector<DILineInfo>>
findSymbolCommon(const T & ModuleSpecifier,StringRef Symbol,uint64_t Offset)236 LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol,
237                                  uint64_t Offset) {
238   auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
239   if (!InfoOrErr)
240     return InfoOrErr.takeError();
241 
242   SymbolizableModule *Info = *InfoOrErr;
243   std::vector<DILineInfo> Result;
244 
245   // A null module means an error has already been reported. Return an empty
246   // result.
247   if (!Info)
248     return Result;
249 
250   for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) {
251     DILineInfo LineInfo = Info->symbolizeCode(
252         A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
253         Opts.UseSymbolTable);
254     if (LineInfo.FileName != DILineInfo::BadString) {
255       if (Opts.Demangle)
256         LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
257       Result.push_back(LineInfo);
258     }
259   }
260 
261   return Result;
262 }
263 
264 Expected<std::vector<DILineInfo>>
findSymbol(const ObjectFile & Obj,StringRef Symbol,uint64_t Offset)265 LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol,
266                            uint64_t Offset) {
267   return findSymbolCommon(Obj, Symbol, Offset);
268 }
269 
270 Expected<std::vector<DILineInfo>>
findSymbol(const std::string & ModuleName,StringRef Symbol,uint64_t Offset)271 LLVMSymbolizer::findSymbol(const std::string &ModuleName, StringRef Symbol,
272                            uint64_t Offset) {
273   return findSymbolCommon(ModuleName, Symbol, Offset);
274 }
275 
276 Expected<std::vector<DILineInfo>>
findSymbol(ArrayRef<uint8_t> BuildID,StringRef Symbol,uint64_t Offset)277 LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol,
278                            uint64_t Offset) {
279   return findSymbolCommon(BuildID, Symbol, Offset);
280 }
281 
flush()282 void LLVMSymbolizer::flush() {
283   ObjectForUBPathAndArch.clear();
284   LRUBinaries.clear();
285   CacheSize = 0;
286   BinaryForPath.clear();
287   ObjectPairForPathArch.clear();
288   Modules.clear();
289   BuildIDPaths.clear();
290 }
291 
292 namespace {
293 
294 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in
295 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
296 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
297 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
getDarwinDWARFResourceForPath(const std::string & Path,const std::string & Basename)298 std::string getDarwinDWARFResourceForPath(const std::string &Path,
299                                           const std::string &Basename) {
300   SmallString<16> ResourceName = StringRef(Path);
301   if (sys::path::extension(Path) != ".dSYM") {
302     ResourceName += ".dSYM";
303   }
304   sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
305   sys::path::append(ResourceName, Basename);
306   return std::string(ResourceName);
307 }
308 
checkFileCRC(StringRef Path,uint32_t CRCHash)309 bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
310   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
311       MemoryBuffer::getFileOrSTDIN(Path);
312   if (!MB)
313     return false;
314   return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer()));
315 }
316 
getGNUDebuglinkContents(const ObjectFile * Obj,std::string & DebugName,uint32_t & CRCHash)317 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
318                              uint32_t &CRCHash) {
319   if (!Obj)
320     return false;
321   for (const SectionRef &Section : Obj->sections()) {
322     StringRef Name;
323     consumeError(Section.getName().moveInto(Name));
324 
325     Name = Name.substr(Name.find_first_not_of("._"));
326     if (Name == "gnu_debuglink") {
327       Expected<StringRef> ContentsOrErr = Section.getContents();
328       if (!ContentsOrErr) {
329         consumeError(ContentsOrErr.takeError());
330         return false;
331       }
332       DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0);
333       uint64_t Offset = 0;
334       if (const char *DebugNameStr = DE.getCStr(&Offset)) {
335         // 4-byte align the offset.
336         Offset = (Offset + 3) & ~0x3;
337         if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
338           DebugName = DebugNameStr;
339           CRCHash = DE.getU32(&Offset);
340           return true;
341         }
342       }
343       break;
344     }
345   }
346   return false;
347 }
348 
darwinDsymMatchesBinary(const MachOObjectFile * DbgObj,const MachOObjectFile * Obj)349 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
350                              const MachOObjectFile *Obj) {
351   ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
352   ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
353   if (dbg_uuid.empty() || bin_uuid.empty())
354     return false;
355   return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
356 }
357 
358 } // end anonymous namespace
359 
lookUpDsymFile(const std::string & ExePath,const MachOObjectFile * MachExeObj,const std::string & ArchName)360 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
361                                            const MachOObjectFile *MachExeObj,
362                                            const std::string &ArchName) {
363   // On Darwin we may find DWARF in separate object file in
364   // resource directory.
365   std::vector<std::string> DsymPaths;
366   StringRef Filename = sys::path::filename(ExePath);
367   DsymPaths.push_back(
368       getDarwinDWARFResourceForPath(ExePath, std::string(Filename)));
369   for (const auto &Path : Opts.DsymHints) {
370     DsymPaths.push_back(
371         getDarwinDWARFResourceForPath(Path, std::string(Filename)));
372   }
373   for (const auto &Path : DsymPaths) {
374     auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
375     if (!DbgObjOrErr) {
376       // Ignore errors, the file might not exist.
377       consumeError(DbgObjOrErr.takeError());
378       continue;
379     }
380     ObjectFile *DbgObj = DbgObjOrErr.get();
381     if (!DbgObj)
382       continue;
383     const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
384     if (!MachDbgObj)
385       continue;
386     if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
387       return DbgObj;
388   }
389   return nullptr;
390 }
391 
lookUpDebuglinkObject(const std::string & Path,const ObjectFile * Obj,const std::string & ArchName)392 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
393                                                   const ObjectFile *Obj,
394                                                   const std::string &ArchName) {
395   std::string DebuglinkName;
396   uint32_t CRCHash;
397   std::string DebugBinaryPath;
398   if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
399     return nullptr;
400   if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
401     return nullptr;
402   auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
403   if (!DbgObjOrErr) {
404     // Ignore errors, the file might not exist.
405     consumeError(DbgObjOrErr.takeError());
406     return nullptr;
407   }
408   return DbgObjOrErr.get();
409 }
410 
lookUpBuildIDObject(const std::string & Path,const ELFObjectFileBase * Obj,const std::string & ArchName)411 ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
412                                                 const ELFObjectFileBase *Obj,
413                                                 const std::string &ArchName) {
414   auto BuildID = getBuildID(Obj);
415   if (BuildID.size() < 2)
416     return nullptr;
417   std::string DebugBinaryPath;
418   if (!getOrFindDebugBinary(BuildID, DebugBinaryPath))
419     return nullptr;
420   auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
421   if (!DbgObjOrErr) {
422     consumeError(DbgObjOrErr.takeError());
423     return nullptr;
424   }
425   return DbgObjOrErr.get();
426 }
427 
findDebugBinary(const std::string & OrigPath,const std::string & DebuglinkName,uint32_t CRCHash,std::string & Result)428 bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath,
429                                      const std::string &DebuglinkName,
430                                      uint32_t CRCHash, std::string &Result) {
431   SmallString<16> OrigDir(OrigPath);
432   llvm::sys::path::remove_filename(OrigDir);
433   SmallString<16> DebugPath = OrigDir;
434   // Try relative/path/to/original_binary/debuglink_name
435   llvm::sys::path::append(DebugPath, DebuglinkName);
436   if (checkFileCRC(DebugPath, CRCHash)) {
437     Result = std::string(DebugPath);
438     return true;
439   }
440   // Try relative/path/to/original_binary/.debug/debuglink_name
441   DebugPath = OrigDir;
442   llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
443   if (checkFileCRC(DebugPath, CRCHash)) {
444     Result = std::string(DebugPath);
445     return true;
446   }
447   // Make the path absolute so that lookups will go to
448   // "/usr/lib/debug/full/path/to/debug", not
449   // "/usr/lib/debug/to/debug"
450   llvm::sys::fs::make_absolute(OrigDir);
451   if (!Opts.FallbackDebugPath.empty()) {
452     // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
453     DebugPath = Opts.FallbackDebugPath;
454   } else {
455 #if defined(__NetBSD__)
456     // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
457     DebugPath = "/usr/libdata/debug";
458 #else
459     // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
460     DebugPath = "/usr/lib/debug";
461 #endif
462   }
463   llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
464                           DebuglinkName);
465   if (checkFileCRC(DebugPath, CRCHash)) {
466     Result = std::string(DebugPath);
467     return true;
468   }
469   return false;
470 }
471 
getBuildIDStr(ArrayRef<uint8_t> BuildID)472 static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) {
473   return StringRef(reinterpret_cast<const char *>(BuildID.data()),
474                    BuildID.size());
475 }
476 
getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,std::string & Result)477 bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
478                                           std::string &Result) {
479   StringRef BuildIDStr = getBuildIDStr(BuildID);
480   auto I = BuildIDPaths.find(BuildIDStr);
481   if (I != BuildIDPaths.end()) {
482     Result = I->second;
483     return true;
484   }
485   if (!BIDFetcher)
486     return false;
487   if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) {
488     Result = *Path;
489     auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result});
490     assert(InsertResult.second);
491     (void)InsertResult;
492     return true;
493   }
494 
495   return false;
496 }
497 
498 Expected<LLVMSymbolizer::ObjectPair>
getOrCreateObjectPair(const std::string & Path,const std::string & ArchName)499 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
500                                       const std::string &ArchName) {
501   auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
502   if (I != ObjectPairForPathArch.end()) {
503     recordAccess(BinaryForPath.find(Path)->second);
504     return I->second;
505   }
506 
507   auto ObjOrErr = getOrCreateObject(Path, ArchName);
508   if (!ObjOrErr) {
509     ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName),
510                                   ObjectPair(nullptr, nullptr));
511     return ObjOrErr.takeError();
512   }
513 
514   ObjectFile *Obj = ObjOrErr.get();
515   assert(Obj != nullptr);
516   ObjectFile *DbgObj = nullptr;
517 
518   if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
519     DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
520   else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj))
521     DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName);
522   if (!DbgObj)
523     DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
524   if (!DbgObj)
525     DbgObj = Obj;
526   ObjectPair Res = std::make_pair(Obj, DbgObj);
527   std::string DbgObjPath = DbgObj->getFileName().str();
528   auto Pair =
529       ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
530   BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
531     ObjectPairForPathArch.erase(I);
532   });
533   return Res;
534 }
535 
536 Expected<ObjectFile *>
getOrCreateObject(const std::string & Path,const std::string & ArchName)537 LLVMSymbolizer::getOrCreateObject(const std::string &Path,
538                                   const std::string &ArchName) {
539   Binary *Bin;
540   auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
541   if (!Pair.second) {
542     Bin = Pair.first->second->getBinary();
543     recordAccess(Pair.first->second);
544   } else {
545     Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
546     if (!BinOrErr)
547       return BinOrErr.takeError();
548 
549     CachedBinary &CachedBin = Pair.first->second;
550     CachedBin = std::move(BinOrErr.get());
551     CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
552     LRUBinaries.push_back(CachedBin);
553     CacheSize += CachedBin.size();
554     Bin = CachedBin->getBinary();
555   }
556 
557   if (!Bin)
558     return static_cast<ObjectFile *>(nullptr);
559 
560   if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
561     auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
562     if (I != ObjectForUBPathAndArch.end())
563       return I->second.get();
564 
565     Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
566         UB->getMachOObjectForArch(ArchName);
567     if (!ObjOrErr) {
568       ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
569                                      std::unique_ptr<ObjectFile>());
570       return ObjOrErr.takeError();
571     }
572     ObjectFile *Res = ObjOrErr->get();
573     auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
574                                                std::move(ObjOrErr.get()));
575     BinaryForPath.find(Path)->second.pushEvictor(
576         [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); });
577     return Res;
578   }
579   if (Bin->isObject()) {
580     return cast<ObjectFile>(Bin);
581   }
582   return errorCodeToError(object_error::arch_not_found);
583 }
584 
585 Expected<SymbolizableModule *>
createModuleInfo(const ObjectFile * Obj,std::unique_ptr<DIContext> Context,StringRef ModuleName)586 LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
587                                  std::unique_ptr<DIContext> Context,
588                                  StringRef ModuleName) {
589   auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context),
590                                                   Opts.UntagAddresses);
591   std::unique_ptr<SymbolizableModule> SymMod;
592   if (InfoOrErr)
593     SymMod = std::move(*InfoOrErr);
594   auto InsertResult = Modules.insert(
595       std::make_pair(std::string(ModuleName), std::move(SymMod)));
596   assert(InsertResult.second);
597   if (!InfoOrErr)
598     return InfoOrErr.takeError();
599   return InsertResult.first->second.get();
600 }
601 
602 Expected<SymbolizableModule *>
getOrCreateModuleInfo(const std::string & ModuleName)603 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
604   std::string BinaryName = ModuleName;
605   std::string ArchName = Opts.DefaultArch;
606   size_t ColonPos = ModuleName.find_last_of(':');
607   // Verify that substring after colon form a valid arch name.
608   if (ColonPos != std::string::npos) {
609     std::string ArchStr = ModuleName.substr(ColonPos + 1);
610     if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
611       BinaryName = ModuleName.substr(0, ColonPos);
612       ArchName = ArchStr;
613     }
614   }
615 
616   auto I = Modules.find(ModuleName);
617   if (I != Modules.end()) {
618     recordAccess(BinaryForPath.find(BinaryName)->second);
619     return I->second.get();
620   }
621 
622   auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName);
623   if (!ObjectsOrErr) {
624     // Failed to find valid object file.
625     Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
626     return ObjectsOrErr.takeError();
627   }
628   ObjectPair Objects = ObjectsOrErr.get();
629 
630   std::unique_ptr<DIContext> Context;
631   // If this is a COFF object containing PDB info, use a PDBContext to
632   // symbolize. Otherwise, use DWARF.
633   if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
634     const codeview::DebugInfo *DebugInfo;
635     StringRef PDBFileName;
636     auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
637     if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) {
638 #if 0
639       using namespace pdb;
640       std::unique_ptr<IPDBSession> Session;
641 
642       PDB_ReaderType ReaderType =
643           Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native;
644       if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(),
645                                     Session)) {
646         Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
647         // Return along the PDB filename to provide more context
648         return createFileError(PDBFileName, std::move(Err));
649       }
650       Context.reset(new PDBContext(*CoffObject, std::move(Session)));
651 #else
652       return make_error<StringError>(
653           "PDB support not compiled in",
654           std::make_error_code(std::errc::not_supported));
655 #endif
656     }
657   }
658   if (!Context)
659     Context = DWARFContext::create(
660         *Objects.second, DWARFContext::ProcessDebugRelocations::Process,
661         nullptr, Opts.DWPName);
662   auto ModuleOrErr =
663       createModuleInfo(Objects.first, std::move(Context), ModuleName);
664   if (ModuleOrErr) {
665     auto I = Modules.find(ModuleName);
666     BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() {
667       Modules.erase(I);
668     });
669   }
670   return ModuleOrErr;
671 }
672 
673 // For BPF programs .BTF.ext section contains line numbers information,
674 // use it if regular DWARF is not available (e.g. for stripped binary).
useBTFContext(const ObjectFile & Obj)675 static bool useBTFContext(const ObjectFile &Obj) {
676   return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() &&
677          BTFParser::hasBTFSections(Obj);
678 }
679 
680 Expected<SymbolizableModule *>
getOrCreateModuleInfo(const ObjectFile & Obj)681 LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) {
682   StringRef ObjName = Obj.getFileName();
683   auto I = Modules.find(ObjName);
684   if (I != Modules.end())
685     return I->second.get();
686 
687   std::unique_ptr<DIContext> Context;
688   if (useBTFContext(Obj))
689     Context = BTFContext::create(Obj);
690   else
691     Context = DWARFContext::create(Obj);
692   // FIXME: handle COFF object with PDB info to use PDBContext
693   return createModuleInfo(&Obj, std::move(Context), ObjName);
694 }
695 
696 Expected<SymbolizableModule *>
getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID)697 LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) {
698   std::string Path;
699   if (!getOrFindDebugBinary(BuildID, Path)) {
700     return createStringError(errc::no_such_file_or_directory,
701                              "could not find build ID");
702   }
703   return getOrCreateModuleInfo(Path);
704 }
705 
706 namespace {
707 
708 // Undo these various manglings for Win32 extern "C" functions:
709 // cdecl       - _foo
710 // stdcall     - _foo@12
711 // fastcall    - @foo@12
712 // vectorcall  - foo@@12
713 // These are all different linkage names for 'foo'.
demanglePE32ExternCFunc(StringRef SymbolName)714 StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
715   char Front = SymbolName.empty() ? '\0' : SymbolName[0];
716 
717   // Remove any '@[0-9]+' suffix.
718   bool HasAtNumSuffix = false;
719   if (Front != '?') {
720     size_t AtPos = SymbolName.rfind('@');
721     if (AtPos != StringRef::npos &&
722         all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) {
723       SymbolName = SymbolName.substr(0, AtPos);
724       HasAtNumSuffix = true;
725     }
726   }
727 
728   // Remove any ending '@' for vectorcall.
729   bool IsVectorCall = false;
730   if (HasAtNumSuffix && SymbolName.ends_with("@")) {
731     SymbolName = SymbolName.drop_back();
732     IsVectorCall = true;
733   }
734 
735   // If not vectorcall, remove any '_' or '@' prefix.
736   if (!IsVectorCall && (Front == '_' || Front == '@'))
737     SymbolName = SymbolName.drop_front();
738 
739   return SymbolName;
740 }
741 
742 } // end anonymous namespace
743 
744 std::string
DemangleName(const std::string & Name,const SymbolizableModule * DbiModuleDescriptor)745 LLVMSymbolizer::DemangleName(const std::string &Name,
746                              const SymbolizableModule *DbiModuleDescriptor) {
747   std::string Result;
748   if (nonMicrosoftDemangle(Name, Result))
749     return Result;
750 
751   if (!Name.empty() && Name.front() == '?') {
752     // Only do MSVC C++ demangling on symbols starting with '?'.
753     int status = 0;
754     char *DemangledName = microsoftDemangle(
755         Name, nullptr, &status,
756         MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention |
757                         MSDF_NoMemberType | MSDF_NoReturnType));
758     if (status != 0)
759       return Name;
760     Result = DemangledName;
761     free(DemangledName);
762     return Result;
763   }
764 
765   if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) {
766     std::string DemangledCName(demanglePE32ExternCFunc(Name));
767     // On i386 Windows, the C name mangling for different calling conventions
768     // may also be applied on top of the Itanium or Rust name mangling.
769     if (nonMicrosoftDemangle(DemangledCName, Result))
770       return Result;
771     return DemangledCName;
772   }
773   return Name;
774 }
775 
recordAccess(CachedBinary & Bin)776 void LLVMSymbolizer::recordAccess(CachedBinary &Bin) {
777   if (Bin->getBinary())
778     LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator());
779 }
780 
pruneCache()781 void LLVMSymbolizer::pruneCache() {
782   // Evict the LRU binary until the max cache size is reached or there's <= 1
783   // item in the cache. The MRU binary is always kept to avoid thrashing if it's
784   // larger than the cache size.
785   while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() &&
786          std::next(LRUBinaries.begin()) != LRUBinaries.end()) {
787     CachedBinary &Bin = LRUBinaries.front();
788     CacheSize -= Bin.size();
789     LRUBinaries.pop_front();
790     Bin.evict();
791   }
792 }
793 
pushEvictor(std::function<void ()> NewEvictor)794 void CachedBinary::pushEvictor(std::function<void()> NewEvictor) {
795   if (Evictor) {
796     this->Evictor = [OldEvictor = std::move(this->Evictor),
797                      NewEvictor = std::move(NewEvictor)]() {
798       NewEvictor();
799       OldEvictor();
800     };
801   } else {
802     this->Evictor = std::move(NewEvictor);
803   }
804 }
805 
806 } // namespace symbolize
807 } // namespace llvm
808