1 //===- GsymReader.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/GSYM/GsymReader.h"
10 
11 #include <assert.h>
12 #include <inttypes.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 
16 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
17 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
18 #include "llvm/DebugInfo/GSYM/LineTable.h"
19 #include "llvm/Support/BinaryStreamReader.h"
20 #include "llvm/Support/DataExtractor.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 
23 using namespace llvm;
24 using namespace gsym;
25 
26 GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)
27     : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
28 
29 GsymReader::GsymReader(GsymReader &&RHS) = default;
30 
31 GsymReader::~GsymReader() = default;
32 
33 llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {
34   // Open the input file and return an appropriate error if needed.
35   ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
36       MemoryBuffer::getFileOrSTDIN(Filename);
37   auto Err = BuffOrErr.getError();
38   if (Err)
39     return llvm::errorCodeToError(Err);
40   return create(BuffOrErr.get());
41 }
42 
43 llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {
44   auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
45   return create(MemBuffer);
46 }
47 
48 llvm::Expected<llvm::gsym::GsymReader>
49 GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
50   if (!MemBuffer)
51     return createStringError(std::errc::invalid_argument,
52                              "invalid memory buffer");
53   GsymReader GR(std::move(MemBuffer));
54   llvm::Error Err = GR.parse();
55   if (Err)
56     return std::move(Err);
57   return std::move(GR);
58 }
59 
60 llvm::Error
61 GsymReader::parse() {
62   BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
63   // Check for the magic bytes. This file format is designed to be mmap'ed
64   // into a process and accessed as read only. This is done for performance
65   // and efficiency for symbolicating and parsing GSYM data.
66   if (FileData.readObject(Hdr))
67     return createStringError(std::errc::invalid_argument,
68                              "not enough data for a GSYM header");
69 
70   const auto HostByteOrder = llvm::endianness::native;
71   switch (Hdr->Magic) {
72     case GSYM_MAGIC:
73       Endian = HostByteOrder;
74       break;
75     case GSYM_CIGAM:
76       // This is a GSYM file, but not native endianness.
77       Endian = sys::IsBigEndianHost ? llvm::endianness::little
78                                     : llvm::endianness::big;
79       Swap.reset(new SwappedData);
80       break;
81     default:
82       return createStringError(std::errc::invalid_argument,
83                                "not a GSYM file");
84   }
85 
86   bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;
87   // Read a correctly byte swapped header if we need to.
88   if (Swap) {
89     DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
90     if (auto ExpectedHdr = Header::decode(Data))
91       Swap->Hdr = ExpectedHdr.get();
92     else
93       return ExpectedHdr.takeError();
94     Hdr = &Swap->Hdr;
95   }
96 
97   // Detect errors in the header and report any that are found. If we make it
98   // past this without errors, we know we have a good magic value, a supported
99   // version number, verified address offset size and a valid UUID size.
100   if (Error Err = Hdr->checkForError())
101     return Err;
102 
103   if (!Swap) {
104     // This is the native endianness case that is most common and optimized for
105     // efficient lookups. Here we just grab pointers to the native data and
106     // use ArrayRef objects to allow efficient read only access.
107 
108     // Read the address offsets.
109     if (FileData.padToAlignment(Hdr->AddrOffSize) ||
110         FileData.readArray(AddrOffsets,
111                            Hdr->NumAddresses * Hdr->AddrOffSize))
112       return createStringError(std::errc::invalid_argument,
113                               "failed to read address table");
114 
115     // Read the address info offsets.
116     if (FileData.padToAlignment(4) ||
117         FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))
118       return createStringError(std::errc::invalid_argument,
119                               "failed to read address info offsets table");
120 
121     // Read the file table.
122     uint32_t NumFiles = 0;
123     if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))
124       return createStringError(std::errc::invalid_argument,
125                               "failed to read file table");
126 
127     // Get the string table.
128     FileData.setOffset(Hdr->StrtabOffset);
129     if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))
130       return createStringError(std::errc::invalid_argument,
131                               "failed to read string table");
132 } else {
133   // This is the non native endianness case that is not common and not
134   // optimized for lookups. Here we decode the important tables into local
135   // storage and then set the ArrayRef objects to point to these swapped
136   // copies of the read only data so lookups can be as efficient as possible.
137   DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
138 
139   // Read the address offsets.
140   uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize);
141   Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
142   switch (Hdr->AddrOffSize) {
143     case 1:
144       if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))
145         return createStringError(std::errc::invalid_argument,
146                                   "failed to read address table");
147       break;
148     case 2:
149       if (!Data.getU16(&Offset,
150                         reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
151                         Hdr->NumAddresses))
152         return createStringError(std::errc::invalid_argument,
153                                   "failed to read address table");
154       break;
155     case 4:
156       if (!Data.getU32(&Offset,
157                         reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
158                         Hdr->NumAddresses))
159         return createStringError(std::errc::invalid_argument,
160                                   "failed to read address table");
161       break;
162     case 8:
163       if (!Data.getU64(&Offset,
164                         reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
165                         Hdr->NumAddresses))
166         return createStringError(std::errc::invalid_argument,
167                                   "failed to read address table");
168     }
169     AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
170 
171     // Read the address info offsets.
172     Offset = alignTo(Offset, 4);
173     Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
174     if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
175       AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
176     else
177       return createStringError(std::errc::invalid_argument,
178                                "failed to read address table");
179     // Read the file table.
180     const uint32_t NumFiles = Data.getU32(&Offset);
181     if (NumFiles > 0) {
182       Swap->Files.resize(NumFiles);
183       if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2))
184         Files = ArrayRef<FileEntry>(Swap->Files);
185       else
186         return createStringError(std::errc::invalid_argument,
187                                  "failed to read file table");
188     }
189     // Get the string table.
190     StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,
191                                                 Hdr->StrtabSize);
192     if (StrTab.Data.empty())
193       return createStringError(std::errc::invalid_argument,
194                                "failed to read string table");
195   }
196   return Error::success();
197 
198 }
199 
200 const Header &GsymReader::getHeader() const {
201   // The only way to get a GsymReader is from GsymReader::openFile(...) or
202   // GsymReader::copyBuffer() and the header must be valid and initialized to
203   // a valid pointer value, so the assert below should not trigger.
204   assert(Hdr);
205   return *Hdr;
206 }
207 
208 std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
209   switch (Hdr->AddrOffSize) {
210   case 1: return addressForIndex<uint8_t>(Index);
211   case 2: return addressForIndex<uint16_t>(Index);
212   case 4: return addressForIndex<uint32_t>(Index);
213   case 8: return addressForIndex<uint64_t>(Index);
214   }
215   return std::nullopt;
216 }
217 
218 std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
219   const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
220   if (Index < NumAddrInfoOffsets)
221     return AddrInfoOffsets[Index];
222   return std::nullopt;
223 }
224 
225 Expected<uint64_t>
226 GsymReader::getAddressIndex(const uint64_t Addr) const {
227   if (Addr >= Hdr->BaseAddress) {
228     const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
229     std::optional<uint64_t> AddrOffsetIndex;
230     switch (Hdr->AddrOffSize) {
231     case 1:
232       AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
233       break;
234     case 2:
235       AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
236       break;
237     case 4:
238       AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
239       break;
240     case 8:
241       AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
242       break;
243     default:
244       return createStringError(std::errc::invalid_argument,
245                                "unsupported address offset size %u",
246                                Hdr->AddrOffSize);
247     }
248     if (AddrOffsetIndex)
249       return *AddrOffsetIndex;
250   }
251   return createStringError(std::errc::invalid_argument,
252                            "address 0x%" PRIx64 " is not in GSYM", Addr);
253 
254 }
255 
256 llvm::Expected<DataExtractor>
257 GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
258                                           uint64_t &FuncStartAddr) const {
259   Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
260   if (!ExpectedAddrIdx)
261     return ExpectedAddrIdx.takeError();
262   const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
263   // The AddrIdx is the first index of the function info entries that match
264   // \a Addr. We need to iterate over all function info objects that start with
265   // the same address until we find a range that contains \a Addr.
266   std::optional<uint64_t> FirstFuncStartAddr;
267   const size_t NumAddresses = getNumAddresses();
268   for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
269     auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
270     // If there was an error, return the error.
271     if (!ExpextedData)
272       return ExpextedData;
273 
274     // Remember the first function start address if it hasn't already been set.
275     // If it is already valid, check to see if it matches the first function
276     // start address and only continue if it matches.
277     if (FirstFuncStartAddr.has_value()) {
278       if (*FirstFuncStartAddr != FuncStartAddr)
279         break; // Done with consecutive function entries with same address.
280     } else {
281       FirstFuncStartAddr = FuncStartAddr;
282     }
283     // Make sure the current function address ranges contains \a Addr.
284     // Some symbols on Darwin don't have valid sizes, so if we run into a
285     // symbol with zero size, then we have found a match for our address.
286 
287     // The first thing the encoding of a FunctionInfo object is the function
288     // size.
289     uint64_t Offset = 0;
290     uint32_t FuncSize = ExpextedData->getU32(&Offset);
291     if (FuncSize == 0 ||
292         AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
293       return ExpextedData;
294   }
295   return createStringError(std::errc::invalid_argument,
296                            "address 0x%" PRIx64 " is not in GSYM", Addr);
297 }
298 
299 llvm::Expected<DataExtractor>
300 GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
301                                        uint64_t &FuncStartAddr) const {
302   if (AddrIdx >= getNumAddresses())
303     return createStringError(std::errc::invalid_argument,
304                              "invalid address index %" PRIu64, AddrIdx);
305   const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
306   assert((Endian == endianness::big || Endian == endianness::little) &&
307          "Endian must be either big or little");
308   StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset);
309   if (Bytes.empty())
310     return createStringError(std::errc::invalid_argument,
311                              "invalid address info offset 0x%" PRIx32,
312                              AddrInfoOffset);
313   std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx);
314   if (!OptFuncStartAddr)
315     return createStringError(std::errc::invalid_argument,
316                              "failed to extract address[%" PRIu64 "]", AddrIdx);
317   FuncStartAddr = *OptFuncStartAddr;
318   return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
319 }
320 
321 llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
322   uint64_t FuncStartAddr = 0;
323   if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
324     return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
325   else
326     return ExpectedData.takeError();
327 }
328 
329 llvm::Expected<FunctionInfo>
330 GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
331   uint64_t FuncStartAddr = 0;
332   if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
333     return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
334   else
335     return ExpectedData.takeError();
336 }
337 
338 llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
339   uint64_t FuncStartAddr = 0;
340   if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
341     return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr);
342   else
343     return ExpectedData.takeError();
344 }
345 
346 void GsymReader::dump(raw_ostream &OS) {
347   const auto &Header = getHeader();
348   // Dump the GSYM header.
349   OS << Header << "\n";
350   // Dump the address table.
351   OS << "Address Table:\n";
352   OS << "INDEX  OFFSET";
353 
354   switch (Hdr->AddrOffSize) {
355   case 1: OS << "8 "; break;
356   case 2: OS << "16"; break;
357   case 4: OS << "32"; break;
358   case 8: OS << "64"; break;
359   default: OS << "??"; break;
360   }
361   OS << " (ADDRESS)\n";
362   OS << "====== =============================== \n";
363   for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
364     OS << format("[%4u] ", I);
365     switch (Hdr->AddrOffSize) {
366     case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
367     case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
368     case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
369     case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break;
370     default: break;
371     }
372     OS << " (" << HEX64(*getAddress(I)) << ")\n";
373   }
374   // Dump the address info offsets table.
375   OS << "\nAddress Info Offsets:\n";
376   OS << "INDEX  Offset\n";
377   OS << "====== ==========\n";
378   for (uint32_t I = 0; I < Header.NumAddresses; ++I)
379     OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
380   // Dump the file table.
381   OS << "\nFiles:\n";
382   OS << "INDEX  DIRECTORY  BASENAME   PATH\n";
383   OS << "====== ========== ========== ==============================\n";
384   for (uint32_t I = 0; I < Files.size(); ++I) {
385     OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' '
386        << HEX32(Files[I].Base) << ' ';
387     dump(OS, getFile(I));
388     OS << "\n";
389   }
390   OS << "\n" << StrTab << "\n";
391 
392   for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
393     OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
394     if (auto FI = getFunctionInfoAtIndex(I))
395       dump(OS, *FI);
396     else
397       logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");
398   }
399 }
400 
401 void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) {
402   OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
403   if (FI.OptLineTable)
404     dump(OS, *FI.OptLineTable);
405   if (FI.Inline)
406     dump(OS, *FI.Inline);
407 }
408 
409 void GsymReader::dump(raw_ostream &OS, const LineTable &LT) {
410   OS << "LineTable:\n";
411   for (auto &LE: LT) {
412     OS << "  " << HEX64(LE.Addr) << ' ';
413     if (LE.File)
414       dump(OS, getFile(LE.File));
415     OS << ':' << LE.Line << '\n';
416   }
417 }
418 
419 void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
420   if (Indent == 0)
421     OS << "InlineInfo:\n";
422   else
423     OS.indent(Indent);
424   OS << II.Ranges << ' ' << getString(II.Name);
425   if (II.CallFile != 0) {
426     if (auto File = getFile(II.CallFile)) {
427       OS << " called from ";
428       dump(OS, File);
429       OS << ':' << II.CallLine;
430     }
431   }
432   OS << '\n';
433   for (const auto &ChildII: II.Children)
434     dump(OS, ChildII, Indent + 2);
435 }
436 
437 void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
438   if (FE) {
439     // IF we have the file from index 0, then don't print anything
440     if (FE->Dir == 0 && FE->Base == 0)
441       return;
442     StringRef Dir = getString(FE->Dir);
443     StringRef Base = getString(FE->Base);
444     if (!Dir.empty()) {
445       OS << Dir;
446       if (Dir.contains('\\') && !Dir.contains('/'))
447         OS << '\\';
448       else
449         OS << '/';
450     }
451     if (!Base.empty()) {
452       OS << Base;
453     }
454     if (!Dir.empty() || !Base.empty())
455       return;
456   }
457   OS << "<invalid-file>";
458 }
459