1 //===- GsymReader.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
10 #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
11 
12 
13 #include "llvm/ADT/ArrayRef.h"
14 #include "llvm/DebugInfo/GSYM/FileEntry.h"
15 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
16 #include "llvm/DebugInfo/GSYM/Header.h"
17 #include "llvm/DebugInfo/GSYM/LineEntry.h"
18 #include "llvm/DebugInfo/GSYM/StringTable.h"
19 #include "llvm/Support/DataExtractor.h"
20 #include "llvm/Support/Endian.h"
21 #include "llvm/Support/ErrorOr.h"
22 
23 #include <inttypes.h>
24 #include <memory>
25 #include <stdint.h>
26 #include <string>
27 #include <vector>
28 
29 namespace llvm {
30 class MemoryBuffer;
31 class raw_ostream;
32 
33 namespace gsym {
34 
35 /// GsymReader is used to read GSYM data from a file or buffer.
36 ///
37 /// This class is optimized for very quick lookups when the endianness matches
38 /// the host system. The Header, address table, address info offsets, and file
39 /// table is designed to be mmap'ed as read only into memory and used without
40 /// any parsing needed. If the endianness doesn't match, we swap these objects
41 /// and tables into GsymReader::SwappedData and then point our header and
42 /// ArrayRefs to this swapped internal data.
43 ///
44 /// GsymReader objects must use one of the static functions to create an
45 /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
46 
47 class GsymReader {
48   GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
49   llvm::Error parse();
50 
51   std::unique_ptr<MemoryBuffer> MemBuffer;
52   StringRef GsymBytes;
53   llvm::support::endianness Endian;
54   const Header *Hdr = nullptr;
55   ArrayRef<uint8_t> AddrOffsets;
56   ArrayRef<uint32_t> AddrInfoOffsets;
57   ArrayRef<FileEntry> Files;
58   StringTable StrTab;
59   /// When the GSYM file's endianness doesn't match the host system then
60   /// we must decode all data structures that need to be swapped into
61   /// local storage and set point the ArrayRef objects above to these swapped
62   /// copies.
63   struct SwappedData {
64     Header Hdr;
65     std::vector<uint8_t> AddrOffsets;
66     std::vector<uint32_t> AddrInfoOffsets;
67     std::vector<FileEntry> Files;
68   };
69   std::unique_ptr<SwappedData> Swap;
70 
71 public:
72   GsymReader(GsymReader &&RHS);
73   ~GsymReader();
74 
75   /// Construct a GsymReader from a file on disk.
76   ///
77   /// \param Path The file path the GSYM file to read.
78   /// \returns An expected GsymReader that contains the object or an error
79   /// object that indicates reason for failing to read the GSYM.
80   static llvm::Expected<GsymReader> openFile(StringRef Path);
81 
82   /// Construct a GsymReader from a buffer.
83   ///
84   /// \param Bytes A set of bytes that will be copied and owned by the
85   /// returned object on success.
86   /// \returns An expected GsymReader that contains the object or an error
87   /// object that indicates reason for failing to read the GSYM.
88   static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
89 
90   /// Access the GSYM header.
91   /// \returns A native endian version of the GSYM header.
92   const Header &getHeader() const;
93 
94   /// Get the full function info for an address.
95   ///
96   /// This should be called when a client will store a copy of the complete
97   /// FunctionInfo for a given address. For one off lookups, use the lookup()
98   /// function below.
99   ///
100   /// Symbolication server processes might want to parse the entire function
101   /// info for a given address and cache it if the process stays around to
102   /// service many symbolication addresses, like for parsing profiling
103   /// information.
104   ///
105   /// \param Addr A virtual address from the orignal object file to lookup.
106   ///
107   /// \returns An expected FunctionInfo that contains the function info object
108   /// or an error object that indicates reason for failing to lookup the
109   /// address.
110   llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
111 
112   /// Lookup an address in the a GSYM.
113   ///
114   /// Lookup just the information needed for a specific address \a Addr. This
115   /// function is faster that calling getFunctionInfo() as it will only return
116   /// information that pertains to \a Addr and allows the parsing to skip any
117   /// extra information encoded for other addresses. For example the line table
118   /// parsing can stop when a matching LineEntry has been fouhnd, and the
119   /// InlineInfo can stop parsing early once a match has been found and also
120   /// skip information that doesn't match. This avoids memory allocations and
121   /// is much faster for lookups.
122   ///
123   /// \param Addr A virtual address from the orignal object file to lookup.
124   /// \returns An expected LookupResult that contains only the information
125   /// needed for the current address, or an error object that indicates reason
126   /// for failing to lookup the address.
127   llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
128 
129   /// Get a string from the string table.
130   ///
131   /// \param Offset The string table offset for the string to retrieve.
132   /// \returns The string from the strin table.
133   StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
134 
135   /// Get the a file entry for the suppplied file index.
136   ///
137   /// Used to convert any file indexes in the FunctionInfo data back into
138   /// files. This function can be used for iteration, but is more commonly used
139   /// for random access when doing lookups.
140   ///
141   /// \param Index An index into the file table.
142   /// \returns An optional FileInfo that will be valid if the file index is
143   /// valid, or llvm::None if the file index is out of bounds,
144   Optional<FileEntry> getFile(uint32_t Index) const {
145     if (Index < Files.size())
146       return Files[Index];
147     return llvm::None;
148   }
149 
150   /// Dump the entire Gsym data contained in this object.
151   ///
152   /// \param  OS The output stream to dump to.
153   void dump(raw_ostream &OS);
154 
155   /// Dump a FunctionInfo object.
156   ///
157   /// This function will convert any string table indexes and file indexes
158   /// into human readable format.
159   ///
160   /// \param  OS The output stream to dump to.
161   ///
162   /// \param FI The object to dump.
163   void dump(raw_ostream &OS, const FunctionInfo &FI);
164 
165   /// Dump a LineTable object.
166   ///
167   /// This function will convert any string table indexes and file indexes
168   /// into human readable format.
169   ///
170   ///
171   /// \param  OS The output stream to dump to.
172   ///
173   /// \param LT The object to dump.
174   void dump(raw_ostream &OS, const LineTable &LT);
175 
176   /// Dump a InlineInfo object.
177   ///
178   /// This function will convert any string table indexes and file indexes
179   /// into human readable format.
180   ///
181   /// \param  OS The output stream to dump to.
182   ///
183   /// \param II The object to dump.
184   ///
185   /// \param Indent The indentation as number of spaces. Used for recurive
186   /// dumping.
187   void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0);
188 
189   /// Dump a FileEntry object.
190   ///
191   /// This function will convert any string table indexes into human readable
192   /// format.
193   ///
194   /// \param  OS The output stream to dump to.
195   ///
196   /// \param FE The object to dump.
197   void dump(raw_ostream &OS, Optional<FileEntry> FE);
198 
199   /// Get the number of addresses in this Gsym file.
200   uint32_t getNumAddresses() const {
201     return Hdr->NumAddresses;
202   }
203 
204   /// Gets an address from the address table.
205   ///
206   /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
207   ///
208   /// \param Index A index into the address table.
209   /// \returns A resolved virtual address for adddress in the address table
210   /// or llvm::None if Index is out of bounds.
211   Optional<uint64_t> getAddress(size_t Index) const;
212 
213 protected:
214 
215   /// Get an appropriate address info offsets array.
216   ///
217   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
218   /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
219   /// internally as a array of bytes that are in the correct endianness. When
220   /// we access this table we must get an array that matches those sizes. This
221   /// templatized helper function is used when accessing address offsets in the
222   /// AddrOffsets member variable.
223   ///
224   /// \returns An ArrayRef of an appropriate address offset size.
225   template <class T> ArrayRef<T>
226   getAddrOffsets() const {
227     return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
228                        AddrOffsets.size()/sizeof(T));
229   }
230 
231   /// Get an appropriate address from the address table.
232   ///
233   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
234   /// byte address offsets from the The gsym::Header::BaseAddress. The table is
235   /// stored internally as a array of bytes that are in the correct endianness.
236   /// In order to extract an address from the address table we must access the
237   /// address offset using the correct size and then add it to the BaseAddress
238   /// in the header.
239   ///
240   /// \param Index An index into the AddrOffsets array.
241   /// \returns An virtual address that matches the original object file for the
242   /// address as the specified index, or llvm::None if Index is out of bounds.
243   template <class T> Optional<uint64_t>
244   addressForIndex(size_t Index) const {
245     ArrayRef<T> AIO = getAddrOffsets<T>();
246     if (Index < AIO.size())
247       return AIO[Index] + Hdr->BaseAddress;
248     return llvm::None;
249   }
250   /// Lookup an address offset in the AddrOffsets table.
251   ///
252   /// Given an address offset, look it up using a binary search of the
253   /// AddrOffsets table.
254   ///
255   /// \param AddrOffset An address offset, that has already been computed by
256   /// subtracting the gsym::Header::BaseAddress.
257   /// \returns The matching address offset index. This index will be used to
258   /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
259   template <class T>
260   llvm::Optional<uint64_t> getAddressOffsetIndex(const uint64_t AddrOffset) const {
261     ArrayRef<T> AIO = getAddrOffsets<T>();
262     const auto Begin = AIO.begin();
263     const auto End = AIO.end();
264     auto Iter = std::lower_bound(Begin, End, AddrOffset);
265     // Watch for addresses that fall between the gsym::Header::BaseAddress and
266     // the first address offset.
267     if (Iter == Begin && AddrOffset < *Begin)
268       return llvm::None;
269     if (Iter == End || AddrOffset < *Iter)
270       --Iter;
271     return std::distance(Begin, Iter);
272   }
273 
274   /// Create a GSYM from a memory buffer.
275   ///
276   /// Called by both openFile() and copyBuffer(), this function does all of the
277   /// work of parsing the GSYM file and returning an error.
278   ///
279   /// \param MemBuffer A memory buffer that will transfer ownership into the
280   /// GsymReader.
281   /// \returns An expected GsymReader that contains the object or an error
282   /// object that indicates reason for failing to read the GSYM.
283   static llvm::Expected<llvm::gsym::GsymReader>
284   create(std::unique_ptr<MemoryBuffer> &MemBuffer);
285 
286 
287   /// Given an address, find the address index.
288   ///
289   /// Binary search the address table and find the matching address index.
290   ///
291   /// \param Addr A virtual address that matches the original object file
292   /// to lookup.
293   /// \returns An index into the address table. This index can be used to
294   /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
295   /// Returns an error if the address isn't in the GSYM with details of why.
296   Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
297 
298   /// Given an address index, get the offset for the FunctionInfo.
299   ///
300   /// Looking up an address is done by finding the corresponding address
301   /// index for the address. This index is then used to get the offset of the
302   /// FunctionInfo data that we will decode using this function.
303   ///
304   /// \param Index An index into the address table.
305   /// \returns An optional GSYM data offset for the offset of the FunctionInfo
306   /// that needs to be decoded.
307   Optional<uint64_t> getAddressInfoOffset(size_t Index) const;
308 };
309 
310 } // namespace gsym
311 } // namespace llvm
312 
313 #endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
314