1 //===- GsymReader.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/GSYM/GsymReader.h"
10 
11 #include <assert.h>
12 #include <inttypes.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 
16 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
17 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
18 #include "llvm/DebugInfo/GSYM/LineTable.h"
19 #include "llvm/Support/BinaryStreamReader.h"
20 #include "llvm/Support/DataExtractor.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 
23 using namespace llvm;
24 using namespace gsym;
25 
26 GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer) :
27     MemBuffer(std::move(Buffer)),
28     Endian(support::endian::system_endianness()) {}
29 
30   GsymReader::GsymReader(GsymReader &&RHS) = default;
31 
32 GsymReader::~GsymReader() = default;
33 
34 llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {
35   // Open the input file and return an appropriate error if needed.
36   ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
37       MemoryBuffer::getFileOrSTDIN(Filename);
38   auto Err = BuffOrErr.getError();
39   if (Err)
40     return llvm::errorCodeToError(Err);
41   return create(BuffOrErr.get());
42 }
43 
44 llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {
45   auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
46   return create(MemBuffer);
47 }
48 
49 llvm::Expected<llvm::gsym::GsymReader>
50 GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
51   if (!MemBuffer)
52     return createStringError(std::errc::invalid_argument,
53                              "invalid memory buffer");
54   GsymReader GR(std::move(MemBuffer));
55   llvm::Error Err = GR.parse();
56   if (Err)
57     return std::move(Err);
58   return std::move(GR);
59 }
60 
61 llvm::Error
62 GsymReader::parse() {
63   BinaryStreamReader FileData(MemBuffer->getBuffer(),
64                               support::endian::system_endianness());
65   // Check for the magic bytes. This file format is designed to be mmap'ed
66   // into a process and accessed as read only. This is done for performance
67   // and efficiency for symbolicating and parsing GSYM data.
68   if (FileData.readObject(Hdr))
69     return createStringError(std::errc::invalid_argument,
70                              "not enough data for a GSYM header");
71 
72   const auto HostByteOrder = support::endian::system_endianness();
73   switch (Hdr->Magic) {
74     case GSYM_MAGIC:
75       Endian = HostByteOrder;
76       break;
77     case GSYM_CIGAM:
78       // This is a GSYM file, but not native endianness.
79       Endian = sys::IsBigEndianHost ? support::little : support::big;
80       Swap.reset(new SwappedData);
81       break;
82     default:
83       return createStringError(std::errc::invalid_argument,
84                                "not a GSYM file");
85   }
86 
87   bool DataIsLittleEndian = HostByteOrder != support::little;
88   // Read a correctly byte swapped header if we need to.
89   if (Swap) {
90     DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
91     if (auto ExpectedHdr = Header::decode(Data))
92       Swap->Hdr = ExpectedHdr.get();
93     else
94       return ExpectedHdr.takeError();
95     Hdr = &Swap->Hdr;
96   }
97 
98   // Detect errors in the header and report any that are found. If we make it
99   // past this without errors, we know we have a good magic value, a supported
100   // version number, verified address offset size and a valid UUID size.
101   if (Error Err = Hdr->checkForError())
102     return Err;
103 
104   if (!Swap) {
105     // This is the native endianness case that is most common and optimized for
106     // efficient lookups. Here we just grab pointers to the native data and
107     // use ArrayRef objects to allow efficient read only access.
108 
109     // Read the address offsets.
110     if (FileData.padToAlignment(Hdr->AddrOffSize) ||
111         FileData.readArray(AddrOffsets,
112                            Hdr->NumAddresses * Hdr->AddrOffSize))
113       return createStringError(std::errc::invalid_argument,
114                               "failed to read address table");
115 
116     // Read the address info offsets.
117     if (FileData.padToAlignment(4) ||
118         FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))
119       return createStringError(std::errc::invalid_argument,
120                               "failed to read address info offsets table");
121 
122     // Read the file table.
123     uint32_t NumFiles = 0;
124     if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))
125       return createStringError(std::errc::invalid_argument,
126                               "failed to read file table");
127 
128     // Get the string table.
129     FileData.setOffset(Hdr->StrtabOffset);
130     if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))
131       return createStringError(std::errc::invalid_argument,
132                               "failed to read string table");
133 } else {
134   // This is the non native endianness case that is not common and not
135   // optimized for lookups. Here we decode the important tables into local
136   // storage and then set the ArrayRef objects to point to these swapped
137   // copies of the read only data so lookups can be as efficient as possible.
138   DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
139 
140   // Read the address offsets.
141   uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize);
142   Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
143   switch (Hdr->AddrOffSize) {
144     case 1:
145       if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))
146         return createStringError(std::errc::invalid_argument,
147                                   "failed to read address table");
148       break;
149     case 2:
150       if (!Data.getU16(&Offset,
151                         reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
152                         Hdr->NumAddresses))
153         return createStringError(std::errc::invalid_argument,
154                                   "failed to read address table");
155       break;
156     case 4:
157       if (!Data.getU32(&Offset,
158                         reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
159                         Hdr->NumAddresses))
160         return createStringError(std::errc::invalid_argument,
161                                   "failed to read address table");
162       break;
163     case 8:
164       if (!Data.getU64(&Offset,
165                         reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
166                         Hdr->NumAddresses))
167         return createStringError(std::errc::invalid_argument,
168                                   "failed to read address table");
169     }
170     AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
171 
172     // Read the address info offsets.
173     Offset = alignTo(Offset, 4);
174     Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
175     if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
176       AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
177     else
178       return createStringError(std::errc::invalid_argument,
179                                "failed to read address table");
180     // Read the file table.
181     const uint32_t NumFiles = Data.getU32(&Offset);
182     if (NumFiles > 0) {
183       Swap->Files.resize(NumFiles);
184       if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2))
185         Files = ArrayRef<FileEntry>(Swap->Files);
186       else
187         return createStringError(std::errc::invalid_argument,
188                                  "failed to read file table");
189     }
190     // Get the string table.
191     StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,
192                                                 Hdr->StrtabSize);
193     if (StrTab.Data.empty())
194       return createStringError(std::errc::invalid_argument,
195                                "failed to read string table");
196   }
197   return Error::success();
198 
199 }
200 
201 const Header &GsymReader::getHeader() const {
202   // The only way to get a GsymReader is from GsymReader::openFile(...) or
203   // GsymReader::copyBuffer() and the header must be valid and initialized to
204   // a valid pointer value, so the assert below should not trigger.
205   assert(Hdr);
206   return *Hdr;
207 }
208 
209 std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
210   switch (Hdr->AddrOffSize) {
211   case 1: return addressForIndex<uint8_t>(Index);
212   case 2: return addressForIndex<uint16_t>(Index);
213   case 4: return addressForIndex<uint32_t>(Index);
214   case 8: return addressForIndex<uint64_t>(Index);
215   }
216   return std::nullopt;
217 }
218 
219 std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
220   const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
221   if (Index < NumAddrInfoOffsets)
222     return AddrInfoOffsets[Index];
223   return std::nullopt;
224 }
225 
226 Expected<uint64_t>
227 GsymReader::getAddressIndex(const uint64_t Addr) const {
228   if (Addr >= Hdr->BaseAddress) {
229     const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
230     std::optional<uint64_t> AddrOffsetIndex;
231     switch (Hdr->AddrOffSize) {
232     case 1:
233       AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
234       break;
235     case 2:
236       AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
237       break;
238     case 4:
239       AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
240       break;
241     case 8:
242       AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
243       break;
244     default:
245       return createStringError(std::errc::invalid_argument,
246                                "unsupported address offset size %u",
247                                Hdr->AddrOffSize);
248     }
249     if (AddrOffsetIndex)
250       return *AddrOffsetIndex;
251   }
252   return createStringError(std::errc::invalid_argument,
253                            "address 0x%" PRIx64 " is not in GSYM", Addr);
254 
255 }
256 
257 llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
258   Expected<uint64_t> AddressIndex = getAddressIndex(Addr);
259   if (!AddressIndex)
260     return AddressIndex.takeError();
261   // Address info offsets size should have been checked in parse().
262   assert(*AddressIndex < AddrInfoOffsets.size());
263   auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
264   DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
265   if (std::optional<uint64_t> OptAddr = getAddress(*AddressIndex)) {
266     auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr);
267     if (ExpectedFI) {
268       if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0)
269         return ExpectedFI;
270       return createStringError(std::errc::invalid_argument,
271                                 "address 0x%" PRIx64 " is not in GSYM", Addr);
272     }
273   }
274   return createStringError(std::errc::invalid_argument,
275                            "failed to extract address[%" PRIu64 "]",
276                            *AddressIndex);
277 }
278 
279 llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
280   Expected<uint64_t> AddressIndex = getAddressIndex(Addr);
281   if (!AddressIndex)
282     return AddressIndex.takeError();
283   // Address info offsets size should have been checked in parse().
284   assert(*AddressIndex < AddrInfoOffsets.size());
285   auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
286   DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
287   if (std::optional<uint64_t> OptAddr = getAddress(*AddressIndex))
288     return FunctionInfo::lookup(Data, *this, *OptAddr, Addr);
289   return createStringError(std::errc::invalid_argument,
290                            "failed to extract address[%" PRIu64 "]",
291                            *AddressIndex);
292 }
293 
294 void GsymReader::dump(raw_ostream &OS) {
295   const auto &Header = getHeader();
296   // Dump the GSYM header.
297   OS << Header << "\n";
298   // Dump the address table.
299   OS << "Address Table:\n";
300   OS << "INDEX  OFFSET";
301 
302   switch (Hdr->AddrOffSize) {
303   case 1: OS << "8 "; break;
304   case 2: OS << "16"; break;
305   case 4: OS << "32"; break;
306   case 8: OS << "64"; break;
307   default: OS << "??"; break;
308   }
309   OS << " (ADDRESS)\n";
310   OS << "====== =============================== \n";
311   for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
312     OS << format("[%4u] ", I);
313     switch (Hdr->AddrOffSize) {
314     case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
315     case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
316     case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
317     case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break;
318     default: break;
319     }
320     OS << " (" << HEX64(*getAddress(I)) << ")\n";
321   }
322   // Dump the address info offsets table.
323   OS << "\nAddress Info Offsets:\n";
324   OS << "INDEX  Offset\n";
325   OS << "====== ==========\n";
326   for (uint32_t I = 0; I < Header.NumAddresses; ++I)
327     OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
328   // Dump the file table.
329   OS << "\nFiles:\n";
330   OS << "INDEX  DIRECTORY  BASENAME   PATH\n";
331   OS << "====== ========== ========== ==============================\n";
332   for (uint32_t I = 0; I < Files.size(); ++I) {
333     OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' '
334        << HEX32(Files[I].Base) << ' ';
335     dump(OS, getFile(I));
336     OS << "\n";
337   }
338   OS << "\n" << StrTab << "\n";
339 
340   for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
341     OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
342     if (auto FI = getFunctionInfo(*getAddress(I)))
343       dump(OS, *FI);
344     else
345       logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");
346   }
347 }
348 
349 void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) {
350   OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
351   if (FI.OptLineTable)
352     dump(OS, *FI.OptLineTable);
353   if (FI.Inline)
354     dump(OS, *FI.Inline);
355 }
356 
357 void GsymReader::dump(raw_ostream &OS, const LineTable &LT) {
358   OS << "LineTable:\n";
359   for (auto &LE: LT) {
360     OS << "  " << HEX64(LE.Addr) << ' ';
361     if (LE.File)
362       dump(OS, getFile(LE.File));
363     OS << ':' << LE.Line << '\n';
364   }
365 }
366 
367 void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
368   if (Indent == 0)
369     OS << "InlineInfo:\n";
370   else
371     OS.indent(Indent);
372   OS << II.Ranges << ' ' << getString(II.Name);
373   if (II.CallFile != 0) {
374     if (auto File = getFile(II.CallFile)) {
375       OS << " called from ";
376       dump(OS, File);
377       OS << ':' << II.CallLine;
378     }
379   }
380   OS << '\n';
381   for (const auto &ChildII: II.Children)
382     dump(OS, ChildII, Indent + 2);
383 }
384 
385 void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
386   if (FE) {
387     // IF we have the file from index 0, then don't print anything
388     if (FE->Dir == 0 && FE->Base == 0)
389       return;
390     StringRef Dir = getString(FE->Dir);
391     StringRef Base = getString(FE->Base);
392     if (!Dir.empty()) {
393       OS << Dir;
394       if (Dir.contains('\\') && !Dir.contains('/'))
395         OS << '\\';
396       else
397         OS << '/';
398     }
399     if (!Base.empty()) {
400       OS << Base;
401     }
402     if (!Dir.empty() || !Base.empty())
403       return;
404   }
405   OS << "<invalid-file>";
406 }
407