1 //===- GsymCreator.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/MC/StringTableBuilder.h"
13 #include "llvm/Support/raw_ostream.h"
14 
15 #include <algorithm>
16 #include <cassert>
17 #include <functional>
18 #include <vector>
19 
20 using namespace llvm;
21 using namespace gsym;
22 
23 
24 GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) {
25   insertFile(StringRef());
26 }
27 
28 uint32_t GsymCreator::insertFile(StringRef Path,
29                                  llvm::sys::path::Style Style) {
30   llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
31   llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
32   // We must insert the strings first, then call the FileEntry constructor.
33   // If we inline the insertString() function call into the constructor, the
34   // call order is undefined due to parameter lists not having any ordering
35   // requirements.
36   const uint32_t Dir = insertString(directory);
37   const uint32_t Base = insertString(filename);
38   FileEntry FE(Dir, Base);
39 
40   std::lock_guard<std::recursive_mutex> Guard(Mutex);
41   const auto NextIndex = Files.size();
42   // Find FE in hash map and insert if not present.
43   auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
44   if (R.second)
45     Files.emplace_back(FE);
46   return R.first->second;
47 }
48 
49 llvm::Error GsymCreator::save(StringRef Path,
50                               llvm::support::endianness ByteOrder) const {
51   std::error_code EC;
52   raw_fd_ostream OutStrm(Path, EC);
53   if (EC)
54     return llvm::errorCodeToError(EC);
55   FileWriter O(OutStrm, ByteOrder);
56   return encode(O);
57 }
58 
59 llvm::Error GsymCreator::encode(FileWriter &O) const {
60   std::lock_guard<std::recursive_mutex> Guard(Mutex);
61   if (Funcs.empty())
62     return createStringError(std::errc::invalid_argument,
63                              "no functions to encode");
64   if (!Finalized)
65     return createStringError(std::errc::invalid_argument,
66                              "GsymCreator wasn't finalized prior to encoding");
67 
68   if (Funcs.size() > UINT32_MAX)
69     return createStringError(std::errc::invalid_argument,
70                              "too many FunctionInfos");
71 
72   const uint64_t MinAddr = BaseAddress ? *BaseAddress : Funcs.front().startAddress();
73   const uint64_t MaxAddr = Funcs.back().startAddress();
74   const uint64_t AddrDelta = MaxAddr - MinAddr;
75   Header Hdr;
76   Hdr.Magic = GSYM_MAGIC;
77   Hdr.Version = GSYM_VERSION;
78   Hdr.AddrOffSize = 0;
79   Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
80   Hdr.BaseAddress = MinAddr;
81   Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
82   Hdr.StrtabOffset = 0; // We will fix this up later.
83   Hdr.StrtabSize = 0; // We will fix this up later.
84   memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
85   if (UUID.size() > sizeof(Hdr.UUID))
86     return createStringError(std::errc::invalid_argument,
87                              "invalid UUID size %u", (uint32_t)UUID.size());
88   // Set the address offset size correctly in the GSYM header.
89   if (AddrDelta <= UINT8_MAX)
90     Hdr.AddrOffSize = 1;
91   else if (AddrDelta <= UINT16_MAX)
92     Hdr.AddrOffSize = 2;
93   else if (AddrDelta <= UINT32_MAX)
94     Hdr.AddrOffSize = 4;
95   else
96     Hdr.AddrOffSize = 8;
97   // Copy the UUID value if we have one.
98   if (UUID.size() > 0)
99     memcpy(Hdr.UUID, UUID.data(), UUID.size());
100   // Write out the header.
101   llvm::Error Err = Hdr.encode(O);
102   if (Err)
103     return Err;
104 
105   // Write out the address offsets.
106   O.alignTo(Hdr.AddrOffSize);
107   for (const auto &FuncInfo : Funcs) {
108     uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
109     switch(Hdr.AddrOffSize) {
110       case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
111       case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
112       case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
113       case 8: O.writeU64(AddrOffset); break;
114     }
115   }
116 
117   // Write out all zeros for the AddrInfoOffsets.
118   O.alignTo(4);
119   const off_t AddrInfoOffsetsOffset = O.tell();
120   for (size_t i = 0, n = Funcs.size(); i < n; ++i)
121     O.writeU32(0);
122 
123   // Write out the file table
124   O.alignTo(4);
125   assert(!Files.empty());
126   assert(Files[0].Dir == 0);
127   assert(Files[0].Base == 0);
128   size_t NumFiles = Files.size();
129   if (NumFiles > UINT32_MAX)
130     return createStringError(std::errc::invalid_argument,
131                              "too many files");
132   O.writeU32(static_cast<uint32_t>(NumFiles));
133   for (auto File: Files) {
134       O.writeU32(File.Dir);
135       O.writeU32(File.Base);
136   }
137 
138   // Write out the sting table.
139   const off_t StrtabOffset = O.tell();
140   StrTab.write(O.get_stream());
141   const off_t StrtabSize = O.tell() - StrtabOffset;
142   std::vector<uint32_t> AddrInfoOffsets;
143 
144   // Write out the address infos for each function info.
145   for (const auto &FuncInfo : Funcs) {
146     if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
147         AddrInfoOffsets.push_back(OffsetOrErr.get());
148     else
149         return OffsetOrErr.takeError();
150   }
151   // Fixup the string table offset and size in the header
152   O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
153   O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
154 
155   // Fixup all address info offsets
156   uint64_t Offset = 0;
157   for (auto AddrInfoOffset: AddrInfoOffsets) {
158     O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
159     Offset += 4;
160   }
161   return ErrorSuccess();
162 }
163 
164 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
165   std::lock_guard<std::recursive_mutex> Guard(Mutex);
166   if (Finalized)
167     return createStringError(std::errc::invalid_argument,
168                              "already finalized");
169   Finalized = true;
170 
171   // Sort function infos so we can emit sorted functions.
172   llvm::sort(Funcs);
173 
174   // Don't let the string table indexes change by finalizing in order.
175   StrTab.finalizeInOrder();
176 
177   // Remove duplicates function infos that have both entries from debug info
178   // (DWARF or Breakpad) and entries from the SymbolTable.
179   //
180   // Also handle overlapping function. Usually there shouldn't be any, but they
181   // can and do happen in some rare cases.
182   //
183   // (a)          (b)         (c)
184   //     ^  ^       ^            ^
185   //     |X |Y      |X ^         |X
186   //     |  |       |  |Y        |  ^
187   //     |  |       |  v         v  |Y
188   //     v  v       v               v
189   //
190   // In (a) and (b), Y is ignored and X will be reported for the full range.
191   // In (c), both functions will be included in the result and lookups for an
192   // address in the intersection will return Y because of binary search.
193   //
194   // Note that in case of (b), we cannot include Y in the result because then
195   // we wouldn't find any function for range (end of Y, end of X)
196   // with binary search
197   auto NumBefore = Funcs.size();
198   auto Curr = Funcs.begin();
199   auto Prev = Funcs.end();
200   while (Curr != Funcs.end()) {
201     // Can't check for overlaps or same address ranges if we don't have a
202     // previous entry
203     if (Prev != Funcs.end()) {
204       if (Prev->Range.intersects(Curr->Range)) {
205         // Overlapping address ranges.
206         if (Prev->Range == Curr->Range) {
207           // Same address range. Check if one is from debug info and the other
208           // is from a symbol table. If so, then keep the one with debug info.
209           // Our sorting guarantees that entries with matching address ranges
210           // that have debug info are last in the sort.
211           if (*Prev == *Curr) {
212             // FunctionInfo entries match exactly (range, lines, inlines)
213             OS << "warning: duplicate function info entries for range: "
214                << Curr->Range << '\n';
215             Curr = Funcs.erase(Prev);
216           } else {
217             if (!Prev->hasRichInfo() && Curr->hasRichInfo()) {
218               // Same address range, one with no debug info (symbol) and the
219               // next with debug info. Keep the latter.
220               Curr = Funcs.erase(Prev);
221             } else {
222               OS << "warning: same address range contains different debug "
223                  << "info. Removing:\n"
224                  << *Prev << "\nIn favor of this one:\n"
225                  << *Curr << "\n";
226               Curr = Funcs.erase(Prev);
227             }
228           }
229         } else {
230           // print warnings about overlaps
231           OS << "warning: function ranges overlap:\n"
232              << *Prev << "\n"
233              << *Curr << "\n";
234         }
235       } else if (Prev->Range.size() == 0 &&
236                  Curr->Range.contains(Prev->Range.Start)) {
237         OS << "warning: removing symbol:\n"
238            << *Prev << "\nKeeping:\n"
239            << *Curr << "\n";
240         Curr = Funcs.erase(Prev);
241       }
242     }
243     if (Curr == Funcs.end())
244       break;
245     Prev = Curr++;
246   }
247 
248   // If our last function info entry doesn't have a size and if we have valid
249   // text ranges, we should set the size of the last entry since any search for
250   // a high address might match our last entry. By fixing up this size, we can
251   // help ensure we don't cause lookups to always return the last symbol that
252   // has no size when doing lookups.
253   if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
254     if (auto Range = ValidTextRanges->getRangeThatContains(
255           Funcs.back().Range.Start)) {
256       Funcs.back().Range.End = Range->End;
257     }
258   }
259   OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
260      << Funcs.size() << " total\n";
261   return Error::success();
262 }
263 
264 uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
265   if (S.empty())
266     return 0;
267   std::lock_guard<std::recursive_mutex> Guard(Mutex);
268   if (Copy) {
269     // We need to provide backing storage for the string if requested
270     // since StringTableBuilder stores references to strings. Any string
271     // that comes from a section in an object file doesn't need to be
272     // copied, but any string created by code will need to be copied.
273     // This allows GsymCreator to be really fast when parsing DWARF and
274     // other object files as most strings don't need to be copied.
275     CachedHashStringRef CHStr(S);
276     if (!StrTab.contains(CHStr))
277       S = StringStorage.insert(S).first->getKey();
278   }
279   return StrTab.add(S);
280 }
281 
282 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
283   std::lock_guard<std::recursive_mutex> Guard(Mutex);
284   Ranges.insert(FI.Range);
285   Funcs.emplace_back(FI);
286 }
287 
288 void GsymCreator::forEachFunctionInfo(
289     std::function<bool(FunctionInfo &)> const &Callback) {
290   std::lock_guard<std::recursive_mutex> Guard(Mutex);
291   for (auto &FI : Funcs) {
292     if (!Callback(FI))
293       break;
294   }
295 }
296 
297 void GsymCreator::forEachFunctionInfo(
298     std::function<bool(const FunctionInfo &)> const &Callback) const {
299   std::lock_guard<std::recursive_mutex> Guard(Mutex);
300   for (const auto &FI : Funcs) {
301     if (!Callback(FI))
302       break;
303   }
304 }
305 
306 size_t GsymCreator::getNumFunctionInfos() const{
307   std::lock_guard<std::recursive_mutex> Guard(Mutex);
308   return Funcs.size();
309 }
310 
311 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
312   if (ValidTextRanges)
313     return ValidTextRanges->contains(Addr);
314   return true; // No valid text ranges has been set, so accept all ranges.
315 }
316 
317 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
318   std::lock_guard<std::recursive_mutex> Guard(Mutex);
319   return Ranges.contains(Addr);
320 }
321