1 //===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
10 #define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
11 
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/DebugInfo/GSYM/ExtractRanges.h"
14 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
15 #include "llvm/DebugInfo/GSYM/LineTable.h"
16 #include "llvm/DebugInfo/GSYM/LookupResult.h"
17 #include "llvm/DebugInfo/GSYM/StringTable.h"
18 #include <cstdint>
19 
20 namespace llvm {
21 class raw_ostream;
22 
23 namespace gsym {
24 
25 class GsymReader;
26 /// Function information in GSYM files encodes information for one contiguous
27 /// address range. If a function has discontiguous address ranges, they will
28 /// need to be encoded using multiple FunctionInfo objects.
29 ///
30 /// ENCODING
31 ///
32 /// The function information gets the function start address as an argument
33 /// to the FunctionInfo::decode(...) function. This information is calculated
34 /// from the GSYM header and an address offset from the GSYM address offsets
35 /// table. The encoded FunctionInfo information must be aligned to a 4 byte
36 /// boundary.
37 ///
38 /// The encoded data for a FunctionInfo starts with fixed data that all
39 /// function info objects have:
40 ///
41 /// ENCODING  NAME        DESCRIPTION
42 /// ========= =========== ====================================================
43 /// uint32_t  Size        The size in bytes of this function.
44 /// uint32_t  Name        The string table offset of the function name.
45 ///
46 /// The optional data in a FunctionInfo object follows this fixed information
47 /// and consists of a stream of tuples that consist of:
48 ///
49 /// ENCODING  NAME        DESCRIPTION
50 /// ========= =========== ====================================================
51 /// uint32_t  InfoType    An "InfoType" enumeration that describes the type
52 ///                       of optional data that is encoded.
53 /// uint32_t  InfoLength  The size in bytes of the encoded data that
54 ///                       immediately follows this length if this value is
55 ///                       greater than zero.
56 /// uint8_t[] InfoData    Encoded bytes that represent the data for the
57 ///                       "InfoType". These bytes are only present if
58 ///                       "InfoLength" is greater than zero.
59 ///
60 /// The "InfoType" is an enumeration:
61 ///
62 ///   enum InfoType {
63 ///     EndOfList = 0u,
64 ///     LineTableInfo = 1u,
65 ///     InlineInfo = 2u
66 ///   };
67 ///
68 /// This stream of tuples is terminated by a "InfoType" whose value is
69 /// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of
70 /// the optional information list. This format allows us to add new optional
71 /// information data to a FunctionInfo object over time and allows older
72 /// clients to still parse the format and skip over any data that they don't
73 /// understand or want to parse.
74 ///
75 /// So the function information encoding essientially looks like:
76 ///
77 /// struct {
78 ///   uint32_t Size;
79 ///   uint32_t Name;
80 ///   struct {
81 ///     uint32_t InfoType;
82 ///     uint32_t InfoLength;
83 ///     uint8_t InfoData[InfoLength];
84 ///   }[N];
85 /// }
86 ///
87 /// Where "N" is the number of tuples.
88 struct FunctionInfo {
89   AddressRange Range;
90   uint32_t Name; ///< String table offset in the string table.
91   std::optional<LineTable> OptLineTable;
92   std::optional<InlineInfo> Inline;
93   /// If we encode a FunctionInfo during segmenting so we know its size, we can
94   /// cache that encoding here so we don't need to re-encode it when saving the
95   /// GSYM file.
96   SmallString<32> EncodingCache;
97 
98   FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
99       : Range(Addr, Addr + Size), Name(N) {}
100 
101   /// Query if a FunctionInfo has rich debug info.
102   ///
103   /// \returns A bool that indicates if this object has something else than
104   /// range and name. When converting information from a symbol table and from
105   /// debug info, we might end up with multiple FunctionInfo objects for the
106   /// same range and we need to be able to tell which one is the better object
107   /// to use.
hasRichInfoFunctionInfo108   bool hasRichInfo() const { return OptLineTable || Inline; }
109 
110   /// Query if a FunctionInfo object is valid.
111   ///
112   /// Address and size can be zero and there can be no line entries for a
113   /// symbol so the only indication this entry is valid is if the name is
114   /// not zero. This can happen when extracting information from symbol
115   /// tables that do not encode symbol sizes. In that case only the
116   /// address and name will be filled in.
117   ///
118   /// \returns A boolean indicating if this FunctionInfo is valid.
isValidFunctionInfo119   bool isValid() const {
120     return Name != 0;
121   }
122 
123   /// Decode an object from a binary data stream.
124   ///
125   /// \param Data The binary stream to read the data from. This object must
126   /// have the data for the object starting at offset zero. The data
127   /// can contain more data than needed.
128   ///
129   /// \param BaseAddr The FunctionInfo's start address and will be used as the
130   /// base address when decoding any contained information like the line table
131   /// and the inline info.
132   ///
133   /// \returns An FunctionInfo or an error describing the issue that was
134   /// encountered during decoding.
135   static llvm::Expected<FunctionInfo> decode(DataExtractor &Data,
136                                              uint64_t BaseAddr);
137 
138   /// Encode this object into FileWriter stream.
139   ///
140   /// \param O The binary stream to write the data to at the current file
141   /// position.
142   ///
143   /// \returns An error object that indicates failure or the offset of the
144   /// function info that was successfully written into the stream.
145   llvm::Expected<uint64_t> encode(FileWriter &O) const;
146 
147   /// Encode this function info into the internal byte cache and return the size
148   /// in bytes.
149   ///
150   /// When segmenting GSYM files we need to know how big each FunctionInfo will
151   /// encode into so we can generate segments of the right size. We don't want
152   /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
153   /// and re-use then when calling FunctionInfo::encode(...).
154   ///
155   /// \returns The size in bytes of the FunctionInfo if it were to be encoded
156   /// into a byte stream.
157   uint64_t cacheEncoding();
158 
159   /// Lookup an address within a FunctionInfo object's data stream.
160   ///
161   /// Instead of decoding an entire FunctionInfo object when doing lookups,
162   /// we can decode only the information we need from the FunctionInfo's data
163   /// for the specific address. The lookup result information is returned as
164   /// a LookupResult.
165   ///
166   /// \param Data The binary stream to read the data from. This object must
167   /// have the data for the object starting at offset zero. The data
168   /// can contain more data than needed.
169   ///
170   /// \param GR The GSYM reader that contains the string and file table that
171   /// will be used to fill in information in the returned result.
172   ///
173   /// \param FuncAddr The function start address decoded from the GsymReader.
174   ///
175   /// \param Addr The address to lookup.
176   ///
177   /// \returns An LookupResult or an error describing the issue that was
178   /// encountered during decoding. An error should only be returned if the
179   /// address is not contained in the FunctionInfo or if the data is corrupted.
180   static llvm::Expected<LookupResult> lookup(DataExtractor &Data,
181                                              const GsymReader &GR,
182                                              uint64_t FuncAddr,
183                                              uint64_t Addr);
184 
startAddressFunctionInfo185   uint64_t startAddress() const { return Range.start(); }
endAddressFunctionInfo186   uint64_t endAddress() const { return Range.end(); }
sizeFunctionInfo187   uint64_t size() const { return Range.size(); }
188 
clearFunctionInfo189   void clear() {
190     Range = {0, 0};
191     Name = 0;
192     OptLineTable = std::nullopt;
193     Inline = std::nullopt;
194   }
195 };
196 
197 inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
198   return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
199          LHS.OptLineTable == RHS.OptLineTable && LHS.Inline == RHS.Inline;
200 }
201 inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
202   return !(LHS == RHS);
203 }
204 /// This sorting will order things consistently by address range first, but
205 /// then followed by increasing levels of debug info like inline information
206 /// and line tables. We might end up with a FunctionInfo from debug info that
207 /// will have the same range as one from the symbol table, but we want to
208 /// quickly be able to sort and use the best version when creating the final
209 /// GSYM file. This function compares the inline information as we have seen
210 /// cases where LTO can generate a wide array of differing inline information,
211 /// mostly due to messing up the address ranges for inlined functions, so the
212 /// inline information with the most entries will appeear last. If the inline
213 /// information match, either by both function infos not having any or both
214 /// being exactly the same, we will then compare line tables. Comparing line
215 /// tables allows the entry with the most line entries to appear last. This
216 /// ensures we are able to save the FunctionInfo with the most debug info into
217 /// the GSYM file.
218 inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
219   // First sort by address range
220   if (LHS.Range != RHS.Range)
221     return LHS.Range < RHS.Range;
222   if (LHS.Inline == RHS.Inline)
223     return LHS.OptLineTable < RHS.OptLineTable;
224   return LHS.Inline < RHS.Inline;
225 }
226 
227 raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
228 
229 } // namespace gsym
230 } // namespace llvm
231 
232 #endif // LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
233