1 //===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
10 #define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
11 
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/DebugInfo/GSYM/ExtractRanges.h"
14 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
15 #include "llvm/DebugInfo/GSYM/LineTable.h"
16 #include "llvm/DebugInfo/GSYM/LookupResult.h"
17 #include "llvm/DebugInfo/GSYM/StringTable.h"
18 #include <cstdint>
19 #include <tuple>
20 
21 namespace llvm {
22 class raw_ostream;
23 
24 namespace gsym {
25 
26 class GsymReader;
27 /// Function information in GSYM files encodes information for one contiguous
28 /// address range. If a function has discontiguous address ranges, they will
29 /// need to be encoded using multiple FunctionInfo objects.
30 ///
31 /// ENCODING
32 ///
33 /// The function information gets the function start address as an argument
34 /// to the FunctionInfo::decode(...) function. This information is calculated
35 /// from the GSYM header and an address offset from the GSYM address offsets
36 /// table. The encoded FunctionInfo information must be aligned to a 4 byte
37 /// boundary.
38 ///
39 /// The encoded data for a FunctionInfo starts with fixed data that all
40 /// function info objects have:
41 ///
42 /// ENCODING  NAME        DESCRIPTION
43 /// ========= =========== ====================================================
44 /// uint32_t  Size        The size in bytes of this function.
45 /// uint32_t  Name        The string table offset of the function name.
46 ///
47 /// The optional data in a FunctionInfo object follows this fixed information
48 /// and consists of a stream of tuples that consist of:
49 ///
50 /// ENCODING  NAME        DESCRIPTION
51 /// ========= =========== ====================================================
52 /// uint32_t  InfoType    An "InfoType" enumeration that describes the type
53 ///                       of optional data that is encoded.
54 /// uint32_t  InfoLength  The size in bytes of the encoded data that
55 ///                       immediately follows this length if this value is
56 ///                       greater than zero.
57 /// uint8_t[] InfoData    Encoded bytes that represent the data for the
58 ///                       "InfoType". These bytes are only present if
59 ///                       "InfoLength" is greater than zero.
60 ///
61 /// The "InfoType" is an enumeration:
62 ///
63 ///   enum InfoType {
64 ///     EndOfList = 0u,
65 ///     LineTableInfo = 1u,
66 ///     InlineInfo = 2u
67 ///   };
68 ///
69 /// This stream of tuples is terminated by a "InfoType" whose value is
70 /// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of
71 /// the optional information list. This format allows us to add new optional
72 /// information data to a FunctionInfo object over time and allows older
73 /// clients to still parse the format and skip over any data that they don't
74 /// understand or want to parse.
75 ///
76 /// So the function information encoding essientially looks like:
77 ///
78 /// struct {
79 ///   uint32_t Size;
80 ///   uint32_t Name;
81 ///   struct {
82 ///     uint32_t InfoType;
83 ///     uint32_t InfoLength;
84 ///     uint8_t InfoData[InfoLength];
85 ///   }[N];
86 /// }
87 ///
88 /// Where "N" is the number of tuples.
89 struct FunctionInfo {
90   AddressRange Range;
91   uint32_t Name; ///< String table offset in the string table.
92   std::optional<LineTable> OptLineTable;
93   std::optional<InlineInfo> Inline;
94   /// If we encode a FunctionInfo during segmenting so we know its size, we can
95   /// cache that encoding here so we don't need to re-encode it when saving the
96   /// GSYM file.
97   SmallString<32> EncodingCache;
98 
99   FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
100       : Range(Addr, Addr + Size), Name(N) {}
101 
102   /// Query if a FunctionInfo has rich debug info.
103   ///
104   /// \returns A bool that indicates if this object has something else than
105   /// range and name. When converting information from a symbol table and from
106   /// debug info, we might end up with multiple FunctionInfo objects for the
107   /// same range and we need to be able to tell which one is the better object
108   /// to use.
109   bool hasRichInfo() const { return OptLineTable || Inline; }
110 
111   /// Query if a FunctionInfo object is valid.
112   ///
113   /// Address and size can be zero and there can be no line entries for a
114   /// symbol so the only indication this entry is valid is if the name is
115   /// not zero. This can happen when extracting information from symbol
116   /// tables that do not encode symbol sizes. In that case only the
117   /// address and name will be filled in.
118   ///
119   /// \returns A boolean indicating if this FunctionInfo is valid.
120   bool isValid() const {
121     return Name != 0;
122   }
123 
124   /// Decode an object from a binary data stream.
125   ///
126   /// \param Data The binary stream to read the data from. This object must
127   /// have the data for the object starting at offset zero. The data
128   /// can contain more data than needed.
129   ///
130   /// \param BaseAddr The FunctionInfo's start address and will be used as the
131   /// base address when decoding any contained information like the line table
132   /// and the inline info.
133   ///
134   /// \returns An FunctionInfo or an error describing the issue that was
135   /// encountered during decoding.
136   static llvm::Expected<FunctionInfo> decode(DataExtractor &Data,
137                                              uint64_t BaseAddr);
138 
139   /// Encode this object into FileWriter stream.
140   ///
141   /// \param O The binary stream to write the data to at the current file
142   /// position.
143   ///
144   /// \returns An error object that indicates failure or the offset of the
145   /// function info that was successfully written into the stream.
146   llvm::Expected<uint64_t> encode(FileWriter &O) const;
147 
148   /// Encode this function info into the internal byte cache and return the size
149   /// in bytes.
150   ///
151   /// When segmenting GSYM files we need to know how big each FunctionInfo will
152   /// encode into so we can generate segments of the right size. We don't want
153   /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
154   /// and re-use then when calling FunctionInfo::encode(...).
155   ///
156   /// \returns The size in bytes of the FunctionInfo if it were to be encoded
157   /// into a byte stream.
158   uint64_t cacheEncoding();
159 
160   /// Lookup an address within a FunctionInfo object's data stream.
161   ///
162   /// Instead of decoding an entire FunctionInfo object when doing lookups,
163   /// we can decode only the information we need from the FunctionInfo's data
164   /// for the specific address. The lookup result information is returned as
165   /// a LookupResult.
166   ///
167   /// \param Data The binary stream to read the data from. This object must
168   /// have the data for the object starting at offset zero. The data
169   /// can contain more data than needed.
170   ///
171   /// \param GR The GSYM reader that contains the string and file table that
172   /// will be used to fill in information in the returned result.
173   ///
174   /// \param FuncAddr The function start address decoded from the GsymReader.
175   ///
176   /// \param Addr The address to lookup.
177   ///
178   /// \returns An LookupResult or an error describing the issue that was
179   /// encountered during decoding. An error should only be returned if the
180   /// address is not contained in the FunctionInfo or if the data is corrupted.
181   static llvm::Expected<LookupResult> lookup(DataExtractor &Data,
182                                              const GsymReader &GR,
183                                              uint64_t FuncAddr,
184                                              uint64_t Addr);
185 
186   uint64_t startAddress() const { return Range.start(); }
187   uint64_t endAddress() const { return Range.end(); }
188   uint64_t size() const { return Range.size(); }
189 
190   void clear() {
191     Range = {0, 0};
192     Name = 0;
193     OptLineTable = std::nullopt;
194     Inline = std::nullopt;
195   }
196 };
197 
198 inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
199   return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
200          LHS.OptLineTable == RHS.OptLineTable && LHS.Inline == RHS.Inline;
201 }
202 inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
203   return !(LHS == RHS);
204 }
205 /// This sorting will order things consistently by address range first, but then
206 /// followed by inlining being valid and line tables. We might end up with a
207 /// FunctionInfo from debug info that will have the same range as one from the
208 /// symbol table, but we want to quickly be able to sort and use the best version
209 /// when creating the final GSYM file.
210 inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
211   // First sort by address range
212   if (LHS.Range != RHS.Range)
213     return LHS.Range < RHS.Range;
214 
215   // Then sort by inline
216   if (LHS.Inline.has_value() != RHS.Inline.has_value())
217     return RHS.Inline.has_value();
218 
219   return LHS.OptLineTable < RHS.OptLineTable;
220 }
221 
222 raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
223 
224 } // namespace gsym
225 } // namespace llvm
226 
227 #endif // LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
228