1 //===- GsymCreator.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/MC/StringTableBuilder.h"
13 #include "llvm/Support/raw_ostream.h"
14 
15 #include <algorithm>
16 #include <cassert>
17 #include <functional>
18 #include <vector>
19 
20 using namespace llvm;
21 using namespace gsym;
22 
23 GsymCreator::GsymCreator(bool Quiet)
24     : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
25   insertFile(StringRef());
26 }
27 
28 uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
29   llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
30   llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
31   // We must insert the strings first, then call the FileEntry constructor.
32   // If we inline the insertString() function call into the constructor, the
33   // call order is undefined due to parameter lists not having any ordering
34   // requirements.
35   const uint32_t Dir = insertString(directory);
36   const uint32_t Base = insertString(filename);
37   FileEntry FE(Dir, Base);
38 
39   std::lock_guard<std::mutex> Guard(Mutex);
40   const auto NextIndex = Files.size();
41   // Find FE in hash map and insert if not present.
42   auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
43   if (R.second)
44     Files.emplace_back(FE);
45   return R.first->second;
46 }
47 
48 llvm::Error GsymCreator::save(StringRef Path,
49                               llvm::support::endianness ByteOrder) const {
50   std::error_code EC;
51   raw_fd_ostream OutStrm(Path, EC);
52   if (EC)
53     return llvm::errorCodeToError(EC);
54   FileWriter O(OutStrm, ByteOrder);
55   return encode(O);
56 }
57 
58 llvm::Error GsymCreator::encode(FileWriter &O) const {
59   std::lock_guard<std::mutex> Guard(Mutex);
60   if (Funcs.empty())
61     return createStringError(std::errc::invalid_argument,
62                              "no functions to encode");
63   if (!Finalized)
64     return createStringError(std::errc::invalid_argument,
65                              "GsymCreator wasn't finalized prior to encoding");
66 
67   if (Funcs.size() > UINT32_MAX)
68     return createStringError(std::errc::invalid_argument,
69                              "too many FunctionInfos");
70 
71   const uint64_t MinAddr =
72       BaseAddress ? *BaseAddress : Funcs.front().startAddress();
73   const uint64_t MaxAddr = Funcs.back().startAddress();
74   const uint64_t AddrDelta = MaxAddr - MinAddr;
75   Header Hdr;
76   Hdr.Magic = GSYM_MAGIC;
77   Hdr.Version = GSYM_VERSION;
78   Hdr.AddrOffSize = 0;
79   Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
80   Hdr.BaseAddress = MinAddr;
81   Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
82   Hdr.StrtabOffset = 0; // We will fix this up later.
83   Hdr.StrtabSize = 0;   // We will fix this up later.
84   memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
85   if (UUID.size() > sizeof(Hdr.UUID))
86     return createStringError(std::errc::invalid_argument,
87                              "invalid UUID size %u", (uint32_t)UUID.size());
88   // Set the address offset size correctly in the GSYM header.
89   if (AddrDelta <= UINT8_MAX)
90     Hdr.AddrOffSize = 1;
91   else if (AddrDelta <= UINT16_MAX)
92     Hdr.AddrOffSize = 2;
93   else if (AddrDelta <= UINT32_MAX)
94     Hdr.AddrOffSize = 4;
95   else
96     Hdr.AddrOffSize = 8;
97   // Copy the UUID value if we have one.
98   if (UUID.size() > 0)
99     memcpy(Hdr.UUID, UUID.data(), UUID.size());
100   // Write out the header.
101   llvm::Error Err = Hdr.encode(O);
102   if (Err)
103     return Err;
104 
105   // Write out the address offsets.
106   O.alignTo(Hdr.AddrOffSize);
107   for (const auto &FuncInfo : Funcs) {
108     uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
109     switch (Hdr.AddrOffSize) {
110     case 1:
111       O.writeU8(static_cast<uint8_t>(AddrOffset));
112       break;
113     case 2:
114       O.writeU16(static_cast<uint16_t>(AddrOffset));
115       break;
116     case 4:
117       O.writeU32(static_cast<uint32_t>(AddrOffset));
118       break;
119     case 8:
120       O.writeU64(AddrOffset);
121       break;
122     }
123   }
124 
125   // Write out all zeros for the AddrInfoOffsets.
126   O.alignTo(4);
127   const off_t AddrInfoOffsetsOffset = O.tell();
128   for (size_t i = 0, n = Funcs.size(); i < n; ++i)
129     O.writeU32(0);
130 
131   // Write out the file table
132   O.alignTo(4);
133   assert(!Files.empty());
134   assert(Files[0].Dir == 0);
135   assert(Files[0].Base == 0);
136   size_t NumFiles = Files.size();
137   if (NumFiles > UINT32_MAX)
138     return createStringError(std::errc::invalid_argument, "too many files");
139   O.writeU32(static_cast<uint32_t>(NumFiles));
140   for (auto File : Files) {
141     O.writeU32(File.Dir);
142     O.writeU32(File.Base);
143   }
144 
145   // Write out the sting table.
146   const off_t StrtabOffset = O.tell();
147   StrTab.write(O.get_stream());
148   const off_t StrtabSize = O.tell() - StrtabOffset;
149   std::vector<uint32_t> AddrInfoOffsets;
150 
151   // Write out the address infos for each function info.
152   for (const auto &FuncInfo : Funcs) {
153     if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
154       AddrInfoOffsets.push_back(OffsetOrErr.get());
155     else
156       return OffsetOrErr.takeError();
157   }
158   // Fixup the string table offset and size in the header
159   O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
160   O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
161 
162   // Fixup all address info offsets
163   uint64_t Offset = 0;
164   for (auto AddrInfoOffset : AddrInfoOffsets) {
165     O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
166     Offset += 4;
167   }
168   return ErrorSuccess();
169 }
170 
171 // Similar to std::remove_if, but the predicate is binary and it is passed both
172 // the previous and the current element.
173 template <class ForwardIt, class BinaryPredicate>
174 static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt,
175                                 BinaryPredicate Pred) {
176   if (FirstIt != LastIt) {
177     auto PrevIt = FirstIt++;
178     FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) {
179       return Pred(*PrevIt++, Curr);
180     });
181     if (FirstIt != LastIt)
182       for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;)
183         if (!Pred(*PrevIt, *CurrIt)) {
184           PrevIt = FirstIt;
185           *FirstIt++ = std::move(*CurrIt);
186         }
187   }
188   return FirstIt;
189 }
190 
191 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
192   std::lock_guard<std::mutex> Guard(Mutex);
193   if (Finalized)
194     return createStringError(std::errc::invalid_argument, "already finalized");
195   Finalized = true;
196 
197   // Sort function infos so we can emit sorted functions.
198   llvm::sort(Funcs);
199 
200   // Don't let the string table indexes change by finalizing in order.
201   StrTab.finalizeInOrder();
202 
203   // Remove duplicates function infos that have both entries from debug info
204   // (DWARF or Breakpad) and entries from the SymbolTable.
205   //
206   // Also handle overlapping function. Usually there shouldn't be any, but they
207   // can and do happen in some rare cases.
208   //
209   // (a)          (b)         (c)
210   //     ^  ^       ^            ^
211   //     |X |Y      |X ^         |X
212   //     |  |       |  |Y        |  ^
213   //     |  |       |  v         v  |Y
214   //     v  v       v               v
215   //
216   // In (a) and (b), Y is ignored and X will be reported for the full range.
217   // In (c), both functions will be included in the result and lookups for an
218   // address in the intersection will return Y because of binary search.
219   //
220   // Note that in case of (b), we cannot include Y in the result because then
221   // we wouldn't find any function for range (end of Y, end of X)
222   // with binary search
223   auto NumBefore = Funcs.size();
224   Funcs.erase(
225       removeIfBinary(Funcs.begin(), Funcs.end(),
226                      [&](const auto &Prev, const auto &Curr) {
227                        // Empty ranges won't intersect, but we still need to
228                        // catch the case where we have multiple symbols at the
229                        // same address and coalesce them.
230                        const bool ranges_equal = Prev.Range == Curr.Range;
231                        if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
232                          // Overlapping ranges or empty identical ranges.
233                          if (ranges_equal) {
234                            // Same address range. Check if one is from debug
235                            // info and the other is from a symbol table. If
236                            // so, then keep the one with debug info. Our
237                            // sorting guarantees that entries with matching
238                            // address ranges that have debug info are last in
239                            // the sort.
240                            if (Prev == Curr) {
241                              // FunctionInfo entries match exactly (range,
242                              // lines, inlines)
243 
244                              // We used to output a warning here, but this was
245                              // so frequent on some binaries, in particular
246                              // when those were built with GCC, that it slowed
247                              // down processing extremely.
248                              return true;
249                            } else {
250                              if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
251                                // Same address range, one with no debug info
252                                // (symbol) and the next with debug info. Keep
253                                // the latter.
254                                return true;
255                              } else {
256                                if (!Quiet) {
257                                  OS << "warning: same address range contains "
258                                        "different debug "
259                                     << "info. Removing:\n"
260                                     << Prev << "\nIn favor of this one:\n"
261                                     << Curr << "\n";
262                                }
263                                return true;
264                              }
265                            }
266                          } else {
267                            if (!Quiet) { // print warnings about overlaps
268                              OS << "warning: function ranges overlap:\n"
269                                 << Prev << "\n"
270                                 << Curr << "\n";
271                            }
272                          }
273                        } else if (Prev.Range.size() == 0 &&
274                                   Curr.Range.contains(Prev.Range.start())) {
275                          if (!Quiet) {
276                            OS << "warning: removing symbol:\n"
277                               << Prev << "\nKeeping:\n"
278                               << Curr << "\n";
279                          }
280                          return true;
281                        }
282 
283                        return false;
284                      }),
285       Funcs.end());
286 
287   // If our last function info entry doesn't have a size and if we have valid
288   // text ranges, we should set the size of the last entry since any search for
289   // a high address might match our last entry. By fixing up this size, we can
290   // help ensure we don't cause lookups to always return the last symbol that
291   // has no size when doing lookups.
292   if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
293     if (auto Range =
294             ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
295       Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
296     }
297   }
298   OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
299      << Funcs.size() << " total\n";
300   return Error::success();
301 }
302 
303 uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
304   if (S.empty())
305     return 0;
306 
307   // The hash can be calculated outside the lock.
308   CachedHashStringRef CHStr(S);
309   std::lock_guard<std::mutex> Guard(Mutex);
310   if (Copy) {
311     // We need to provide backing storage for the string if requested
312     // since StringTableBuilder stores references to strings. Any string
313     // that comes from a section in an object file doesn't need to be
314     // copied, but any string created by code will need to be copied.
315     // This allows GsymCreator to be really fast when parsing DWARF and
316     // other object files as most strings don't need to be copied.
317     if (!StrTab.contains(CHStr))
318       CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
319                                   CHStr.hash()};
320   }
321   return StrTab.add(CHStr);
322 }
323 
324 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
325   std::lock_guard<std::mutex> Guard(Mutex);
326   Ranges.insert(FI.Range);
327   Funcs.emplace_back(std::move(FI));
328 }
329 
330 void GsymCreator::forEachFunctionInfo(
331     std::function<bool(FunctionInfo &)> const &Callback) {
332   std::lock_guard<std::mutex> Guard(Mutex);
333   for (auto &FI : Funcs) {
334     if (!Callback(FI))
335       break;
336   }
337 }
338 
339 void GsymCreator::forEachFunctionInfo(
340     std::function<bool(const FunctionInfo &)> const &Callback) const {
341   std::lock_guard<std::mutex> Guard(Mutex);
342   for (const auto &FI : Funcs) {
343     if (!Callback(FI))
344       break;
345   }
346 }
347 
348 size_t GsymCreator::getNumFunctionInfos() const {
349   std::lock_guard<std::mutex> Guard(Mutex);
350   return Funcs.size();
351 }
352 
353 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
354   if (ValidTextRanges)
355     return ValidTextRanges->contains(Addr);
356   return true; // No valid text ranges has been set, so accept all ranges.
357 }
358 
359 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
360   std::lock_guard<std::mutex> Guard(Mutex);
361   return Ranges.contains(Addr);
362 }
363