1 //===- GsymCreator.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/MC/StringTableBuilder.h"
13 #include "llvm/Support/raw_ostream.h"
14 
15 #include <algorithm>
16 #include <cassert>
17 #include <functional>
18 #include <vector>
19 
20 using namespace llvm;
21 using namespace gsym;
22 
23 
24 GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) {
25   insertFile(StringRef());
26 }
27 
28 uint32_t GsymCreator::insertFile(StringRef Path,
29                                  llvm::sys::path::Style Style) {
30   llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
31   llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
32   FileEntry FE(insertString(directory), insertString(filename));
33 
34   std::lock_guard<std::recursive_mutex> Guard(Mutex);
35   const auto NextIndex = Files.size();
36   // Find FE in hash map and insert if not present.
37   auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
38   if (R.second)
39     Files.emplace_back(FE);
40   return R.first->second;
41 }
42 
43 llvm::Error GsymCreator::save(StringRef Path,
44                               llvm::support::endianness ByteOrder) const {
45   std::error_code EC;
46   raw_fd_ostream OutStrm(Path, EC);
47   if (EC)
48     return llvm::errorCodeToError(EC);
49   FileWriter O(OutStrm, ByteOrder);
50   return encode(O);
51 }
52 
53 llvm::Error GsymCreator::encode(FileWriter &O) const {
54   std::lock_guard<std::recursive_mutex> Guard(Mutex);
55   if (Funcs.empty())
56     return createStringError(std::errc::invalid_argument,
57                              "no functions to encode");
58   if (!Finalized)
59     return createStringError(std::errc::invalid_argument,
60                              "GsymCreator wasn't finalized prior to encoding");
61 
62   if (Funcs.size() > UINT32_MAX)
63     return createStringError(std::errc::invalid_argument,
64                              "too many FunctionInfos");
65   const uint64_t MinAddr = Funcs.front().startAddress();
66   const uint64_t MaxAddr = Funcs.back().startAddress();
67   const uint64_t AddrDelta = MaxAddr - MinAddr;
68   Header Hdr;
69   Hdr.Magic = GSYM_MAGIC;
70   Hdr.Version = GSYM_VERSION;
71   Hdr.AddrOffSize = 0;
72   Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
73   Hdr.BaseAddress = MinAddr;
74   Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
75   Hdr.StrtabOffset = 0; // We will fix this up later.
76   Hdr.StrtabOffset = 0; // We will fix this up later.
77   memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
78   if (UUID.size() > sizeof(Hdr.UUID))
79     return createStringError(std::errc::invalid_argument,
80                              "invalid UUID size %u", (uint32_t)UUID.size());
81   // Set the address offset size correctly in the GSYM header.
82   if (AddrDelta <= UINT8_MAX)
83     Hdr.AddrOffSize = 1;
84   else if (AddrDelta <= UINT16_MAX)
85     Hdr.AddrOffSize = 2;
86   else if (AddrDelta <= UINT32_MAX)
87     Hdr.AddrOffSize = 4;
88   else
89     Hdr.AddrOffSize = 8;
90   // Copy the UUID value if we have one.
91   if (UUID.size() > 0)
92     memcpy(Hdr.UUID, UUID.data(), UUID.size());
93   // Write out the header.
94   llvm::Error Err = Hdr.encode(O);
95   if (Err)
96     return Err;
97 
98   // Write out the address offsets.
99   O.alignTo(Hdr.AddrOffSize);
100   for (const auto &FuncInfo : Funcs) {
101     uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
102     switch(Hdr.AddrOffSize) {
103       case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
104       case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
105       case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
106       case 8: O.writeU64(AddrOffset); break;
107     }
108   }
109 
110   // Write out all zeros for the AddrInfoOffsets.
111   O.alignTo(4);
112   const off_t AddrInfoOffsetsOffset = O.tell();
113   for (size_t i = 0, n = Funcs.size(); i < n; ++i)
114     O.writeU32(0);
115 
116   // Write out the file table
117   O.alignTo(4);
118   assert(!Files.empty());
119   assert(Files[0].Dir == 0);
120   assert(Files[0].Base == 0);
121   size_t NumFiles = Files.size();
122   if (NumFiles > UINT32_MAX)
123     return createStringError(std::errc::invalid_argument,
124                              "too many files");
125   O.writeU32(static_cast<uint32_t>(NumFiles));
126   for (auto File: Files) {
127       O.writeU32(File.Dir);
128       O.writeU32(File.Base);
129   }
130 
131   // Write out the sting table.
132   const off_t StrtabOffset = O.tell();
133   StrTab.write(O.get_stream());
134   const off_t StrtabSize = O.tell() - StrtabOffset;
135   std::vector<uint32_t> AddrInfoOffsets;
136 
137   // Write out the address infos for each function info.
138   for (const auto &FuncInfo : Funcs) {
139     if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
140         AddrInfoOffsets.push_back(OffsetOrErr.get());
141     else
142         return OffsetOrErr.takeError();
143   }
144   // Fixup the string table offset and size in the header
145   O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
146   O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
147 
148   // Fixup all address info offsets
149   uint64_t Offset = 0;
150   for (auto AddrInfoOffset: AddrInfoOffsets) {
151     O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
152     Offset += 4;
153   }
154   return ErrorSuccess();
155 }
156 
157 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
158   std::lock_guard<std::recursive_mutex> Guard(Mutex);
159   if (Finalized)
160     return createStringError(std::errc::invalid_argument,
161                              "already finalized");
162   Finalized = true;
163 
164   // Sort function infos so we can emit sorted functions.
165   llvm::sort(Funcs.begin(), Funcs.end());
166 
167   // Don't let the string table indexes change by finalizing in order.
168   StrTab.finalizeInOrder();
169 
170   // Remove duplicates function infos that have both entries from debug info
171   // (DWARF or Breakpad) and entries from the SymbolTable.
172   //
173   // Also handle overlapping function. Usually there shouldn't be any, but they
174   // can and do happen in some rare cases.
175   //
176   // (a)          (b)         (c)
177   //     ^  ^       ^            ^
178   //     |X |Y      |X ^         |X
179   //     |  |       |  |Y        |  ^
180   //     |  |       |  v         v  |Y
181   //     v  v       v               v
182   //
183   // In (a) and (b), Y is ignored and X will be reported for the full range.
184   // In (c), both functions will be included in the result and lookups for an
185   // address in the intersection will return Y because of binary search.
186   //
187   // Note that in case of (b), we cannot include Y in the result because then
188   // we wouldn't find any function for range (end of Y, end of X)
189   // with binary search
190   auto NumBefore = Funcs.size();
191   auto Curr = Funcs.begin();
192   auto Prev = Funcs.end();
193   while (Curr != Funcs.end()) {
194     // Can't check for overlaps or same address ranges if we don't have a
195     // previous entry
196     if (Prev != Funcs.end()) {
197       if (Prev->Range.intersects(Curr->Range)) {
198         // Overlapping address ranges.
199         if (Prev->Range == Curr->Range) {
200           // Same address range. Check if one is from debug info and the other
201           // is from a symbol table. If so, then keep the one with debug info.
202           // Our sorting guarantees that entries with matching address ranges
203           // that have debug info are last in the sort.
204           if (*Prev == *Curr) {
205             // FunctionInfo entries match exactly (range, lines, inlines)
206             OS << "warning: duplicate function info entries, removing "
207                   "duplicate:\n"
208                << *Curr << '\n';
209             Curr = Funcs.erase(Prev);
210           } else {
211             if (!Prev->hasRichInfo() && Curr->hasRichInfo()) {
212               // Same address range, one with no debug info (symbol) and the
213               // next with debug info. Keep the latter.
214               Curr = Funcs.erase(Prev);
215             } else {
216               OS << "warning: same address range contains different debug "
217                  << "info. Removing:\n"
218                  << *Prev << "\nIn favor of this one:\n"
219                  << *Curr << "\n";
220               Curr = Funcs.erase(Prev);
221             }
222           }
223         } else {
224           // print warnings about overlaps
225           OS << "warning: function ranges overlap:\n"
226              << *Prev << "\n"
227              << *Curr << "\n";
228         }
229       } else if (Prev->Range.size() == 0 &&
230                  Curr->Range.contains(Prev->Range.Start)) {
231         OS << "warning: removing symbol:\n"
232            << *Prev << "\nKeeping:\n"
233            << *Curr << "\n";
234         Curr = Funcs.erase(Prev);
235       }
236     }
237     if (Curr == Funcs.end())
238       break;
239     Prev = Curr++;
240   }
241 
242   OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
243      << Funcs.size() << " total\n";
244   return Error::success();
245 }
246 
247 uint32_t GsymCreator::insertString(StringRef S) {
248   std::lock_guard<std::recursive_mutex> Guard(Mutex);
249   if (S.empty())
250     return 0;
251   return StrTab.add(S);
252 }
253 
254 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
255   std::lock_guard<std::recursive_mutex> Guard(Mutex);
256   Funcs.emplace_back(FI);
257 }
258 
259 void GsymCreator::forEachFunctionInfo(
260     std::function<bool(FunctionInfo &)> const &Callback) {
261   std::lock_guard<std::recursive_mutex> Guard(Mutex);
262   for (auto &FI : Funcs) {
263     if (!Callback(FI))
264       break;
265   }
266 }
267 
268 void GsymCreator::forEachFunctionInfo(
269     std::function<bool(const FunctionInfo &)> const &Callback) const {
270   std::lock_guard<std::recursive_mutex> Guard(Mutex);
271   for (const auto &FI : Funcs) {
272     if (!Callback(FI))
273       break;
274   }
275 }
276