1 //===- ELFObjHandler.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===-----------------------------------------------------------------------===/
8 
9 #include "llvm/InterfaceStub/ELFObjHandler.h"
10 #include "llvm/InterfaceStub/ELFStub.h"
11 #include "llvm/Object/Binary.h"
12 #include "llvm/Object/ELFObjectFile.h"
13 #include "llvm/Object/ELFTypes.h"
14 #include "llvm/Support/Errc.h"
15 #include "llvm/Support/Error.h"
16 #include "llvm/Support/MemoryBuffer.h"
17 
18 using llvm::MemoryBufferRef;
19 using llvm::object::ELFObjectFile;
20 
21 using namespace llvm;
22 using namespace llvm::object;
23 using namespace llvm::ELF;
24 
25 namespace llvm {
26 namespace elfabi {
27 
28 // Simple struct to hold relevant .dynamic entries.
29 struct DynamicEntries {
30   uint64_t StrTabAddr = 0;
31   uint64_t StrSize = 0;
32   Optional<uint64_t> SONameOffset;
33   std::vector<uint64_t> NeededLibNames;
34   // Symbol table:
35   uint64_t DynSymAddr = 0;
36   // Hash tables:
37   Optional<uint64_t> ElfHash;
38   Optional<uint64_t> GnuHash;
39 };
40 
41 /// This function behaves similarly to StringRef::substr(), but attempts to
42 /// terminate the returned StringRef at the first null terminator. If no null
43 /// terminator is found, an error is returned.
44 ///
45 /// @param Str Source string to create a substring from.
46 /// @param Offset The start index of the desired substring.
terminatedSubstr(StringRef Str,size_t Offset)47 static Expected<StringRef> terminatedSubstr(StringRef Str, size_t Offset) {
48   size_t StrEnd = Str.find('\0', Offset);
49   if (StrEnd == StringLiteral::npos) {
50     return createError(
51         "String overran bounds of string table (no null terminator)");
52   }
53 
54   size_t StrLen = StrEnd - Offset;
55   return Str.substr(Offset, StrLen);
56 }
57 
58 /// This function takes an error, and appends a string of text to the end of
59 /// that error. Since "appending" to an Error isn't supported behavior of an
60 /// Error, this function technically creates a new error with the combined
61 /// message and consumes the old error.
62 ///
63 /// @param Err Source error.
64 /// @param After Text to append at the end of Err's error message.
appendToError(Error Err,StringRef After)65 Error appendToError(Error Err, StringRef After) {
66   std::string Message;
67   raw_string_ostream Stream(Message);
68   Stream << Err;
69   Stream << " " << After;
70   consumeError(std::move(Err));
71   return createError(Stream.str().c_str());
72 }
73 
74 /// This function populates a DynamicEntries struct using an ELFT::DynRange.
75 /// After populating the struct, the members are validated with
76 /// some basic sanity checks.
77 ///
78 /// @param Dyn Target DynamicEntries struct to populate.
79 /// @param DynTable Source dynamic table.
80 template <class ELFT>
populateDynamic(DynamicEntries & Dyn,typename ELFT::DynRange DynTable)81 static Error populateDynamic(DynamicEntries &Dyn,
82                              typename ELFT::DynRange DynTable) {
83   if (DynTable.empty())
84     return createError("No .dynamic section found");
85 
86   // Search .dynamic for relevant entries.
87   bool FoundDynStr = false;
88   bool FoundDynStrSz = false;
89   bool FoundDynSym = false;
90   for (auto &Entry : DynTable) {
91     switch (Entry.d_tag) {
92     case DT_SONAME:
93       Dyn.SONameOffset = Entry.d_un.d_val;
94       break;
95     case DT_STRTAB:
96       Dyn.StrTabAddr = Entry.d_un.d_ptr;
97       FoundDynStr = true;
98       break;
99     case DT_STRSZ:
100       Dyn.StrSize = Entry.d_un.d_val;
101       FoundDynStrSz = true;
102       break;
103     case DT_NEEDED:
104       Dyn.NeededLibNames.push_back(Entry.d_un.d_val);
105       break;
106     case DT_SYMTAB:
107       Dyn.DynSymAddr = Entry.d_un.d_ptr;
108       FoundDynSym = true;
109       break;
110     case DT_HASH:
111       Dyn.ElfHash = Entry.d_un.d_ptr;
112       break;
113     case DT_GNU_HASH:
114       Dyn.GnuHash = Entry.d_un.d_ptr;
115     }
116   }
117 
118   if (!FoundDynStr) {
119     return createError(
120         "Couldn't locate dynamic string table (no DT_STRTAB entry)");
121   }
122   if (!FoundDynStrSz) {
123     return createError(
124         "Couldn't determine dynamic string table size (no DT_STRSZ entry)");
125   }
126   if (!FoundDynSym) {
127     return createError(
128         "Couldn't locate dynamic symbol table (no DT_SYMTAB entry)");
129   }
130   if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) {
131     return createStringError(object_error::parse_failed,
132                              "DT_SONAME string offset (0x%016" PRIx64
133                              ") outside of dynamic string table",
134                              *Dyn.SONameOffset);
135   }
136   for (uint64_t Offset : Dyn.NeededLibNames) {
137     if (Offset >= Dyn.StrSize) {
138       return createStringError(object_error::parse_failed,
139                                "DT_NEEDED string offset (0x%016" PRIx64
140                                ") outside of dynamic string table",
141                                Offset);
142     }
143   }
144 
145   return Error::success();
146 }
147 
148 /// This function finds the number of dynamic symbols using a GNU hash table.
149 ///
150 /// @param Table The GNU hash table for .dynsym.
151 template <class ELFT>
getDynSymtabSize(const typename ELFT::GnuHash & Table)152 static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) {
153   using Elf_Word = typename ELFT::Word;
154   if (Table.nbuckets == 0)
155     return Table.symndx + 1;
156   uint64_t LastSymIdx = 0;
157   uint64_t BucketVal = 0;
158   // Find the index of the first symbol in the last chain.
159   for (Elf_Word Val : Table.buckets()) {
160     BucketVal = std::max(BucketVal, (uint64_t)Val);
161   }
162   LastSymIdx += BucketVal;
163   const Elf_Word *It =
164       reinterpret_cast<const Elf_Word *>(Table.values(BucketVal).end());
165   // Locate the end of the chain to find the last symbol index.
166   while ((*It & 1) == 0) {
167     LastSymIdx++;
168     It++;
169   }
170   return LastSymIdx + 1;
171 }
172 
173 /// This function determines the number of dynamic symbols.
174 /// Without access to section headers, the number of symbols must be determined
175 /// by parsing dynamic hash tables.
176 ///
177 /// @param Dyn Entries with the locations of hash tables.
178 /// @param ElfFile The ElfFile that the section contents reside in.
179 template <class ELFT>
getNumSyms(DynamicEntries & Dyn,const ELFFile<ELFT> & ElfFile)180 static Expected<uint64_t> getNumSyms(DynamicEntries &Dyn,
181                                      const ELFFile<ELFT> &ElfFile) {
182   using Elf_Hash = typename ELFT::Hash;
183   using Elf_GnuHash = typename ELFT::GnuHash;
184   // Search GNU hash table to try to find the upper bound of dynsym.
185   if (Dyn.GnuHash.hasValue()) {
186     Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash);
187     if (!TablePtr)
188       return TablePtr.takeError();
189     const Elf_GnuHash *Table =
190         reinterpret_cast<const Elf_GnuHash *>(TablePtr.get());
191     return getDynSymtabSize<ELFT>(*Table);
192   }
193   // Search SYSV hash table to try to find the upper bound of dynsym.
194   if (Dyn.ElfHash.hasValue()) {
195     Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash);
196     if (!TablePtr)
197       return TablePtr.takeError();
198     const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get());
199     return Table->nchain;
200   }
201   return 0;
202 }
203 
204 /// This function extracts symbol type from a symbol's st_info member and
205 /// maps it to an ELFSymbolType enum.
206 /// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported.
207 /// Other symbol types are mapped to ELFSymbolType::Unknown.
208 ///
209 /// @param Info Binary symbol st_info to extract symbol type from.
convertInfoToType(uint8_t Info)210 static ELFSymbolType convertInfoToType(uint8_t Info) {
211   Info = Info & 0xf;
212   switch (Info) {
213   case ELF::STT_NOTYPE:
214     return ELFSymbolType::NoType;
215   case ELF::STT_OBJECT:
216     return ELFSymbolType::Object;
217   case ELF::STT_FUNC:
218     return ELFSymbolType::Func;
219   case ELF::STT_TLS:
220     return ELFSymbolType::TLS;
221   default:
222     return ELFSymbolType::Unknown;
223   }
224 }
225 
226 /// This function creates an ELFSymbol and populates all members using
227 /// information from a binary ELFT::Sym.
228 ///
229 /// @param SymName The desired name of the ELFSymbol.
230 /// @param RawSym ELFT::Sym to extract symbol information from.
231 template <class ELFT>
createELFSym(StringRef SymName,const typename ELFT::Sym & RawSym)232 static ELFSymbol createELFSym(StringRef SymName,
233                               const typename ELFT::Sym &RawSym) {
234   ELFSymbol TargetSym{std::string(SymName)};
235   uint8_t Binding = RawSym.getBinding();
236   if (Binding == STB_WEAK)
237     TargetSym.Weak = true;
238   else
239     TargetSym.Weak = false;
240 
241   TargetSym.Undefined = RawSym.isUndefined();
242   TargetSym.Type = convertInfoToType(RawSym.st_info);
243 
244   if (TargetSym.Type == ELFSymbolType::Func) {
245     TargetSym.Size = 0;
246   } else {
247     TargetSym.Size = RawSym.st_size;
248   }
249   return TargetSym;
250 }
251 
252 /// This function populates an ELFStub with symbols using information read
253 /// from an ELF binary.
254 ///
255 /// @param TargetStub ELFStub to add symbols to.
256 /// @param DynSym Range of dynamic symbols to add to TargetStub.
257 /// @param DynStr StringRef to the dynamic string table.
258 template <class ELFT>
populateSymbols(ELFStub & TargetStub,const typename ELFT::SymRange DynSym,StringRef DynStr)259 static Error populateSymbols(ELFStub &TargetStub,
260                              const typename ELFT::SymRange DynSym,
261                              StringRef DynStr) {
262   // Skips the first symbol since it's the NULL symbol.
263   for (auto RawSym : DynSym.drop_front(1)) {
264     // If a symbol does not have global or weak binding, ignore it.
265     uint8_t Binding = RawSym.getBinding();
266     if (!(Binding == STB_GLOBAL || Binding == STB_WEAK))
267       continue;
268     // If a symbol doesn't have default or protected visibility, ignore it.
269     uint8_t Visibility = RawSym.getVisibility();
270     if (!(Visibility == STV_DEFAULT || Visibility == STV_PROTECTED))
271       continue;
272     // Create an ELFSymbol and populate it with information from the symbol
273     // table entry.
274     Expected<StringRef> SymName = terminatedSubstr(DynStr, RawSym.st_name);
275     if (!SymName)
276       return SymName.takeError();
277     ELFSymbol Sym = createELFSym<ELFT>(*SymName, RawSym);
278     TargetStub.Symbols.insert(std::move(Sym));
279     // TODO: Populate symbol warning.
280   }
281   return Error::success();
282 }
283 
284 /// Returns a new ELFStub with all members populated from an ELFObjectFile.
285 /// @param ElfObj Source ELFObjectFile.
286 template <class ELFT>
287 static Expected<std::unique_ptr<ELFStub>>
buildStub(const ELFObjectFile<ELFT> & ElfObj)288 buildStub(const ELFObjectFile<ELFT> &ElfObj) {
289   using Elf_Dyn_Range = typename ELFT::DynRange;
290   using Elf_Phdr_Range = typename ELFT::PhdrRange;
291   using Elf_Sym_Range = typename ELFT::SymRange;
292   using Elf_Sym = typename ELFT::Sym;
293   std::unique_ptr<ELFStub> DestStub = std::make_unique<ELFStub>();
294   const ELFFile<ELFT> *ElfFile = ElfObj.getELFFile();
295   // Fetch .dynamic table.
296   Expected<Elf_Dyn_Range> DynTable = ElfFile->dynamicEntries();
297   if (!DynTable) {
298     return DynTable.takeError();
299   }
300 
301   // Fetch program headers.
302   Expected<Elf_Phdr_Range> PHdrs = ElfFile->program_headers();
303   if (!PHdrs) {
304     return PHdrs.takeError();
305   }
306 
307   // Collect relevant .dynamic entries.
308   DynamicEntries DynEnt;
309   if (Error Err = populateDynamic<ELFT>(DynEnt, *DynTable))
310     return std::move(Err);
311 
312   // Get pointer to in-memory location of .dynstr section.
313   Expected<const uint8_t *> DynStrPtr =
314       ElfFile->toMappedAddr(DynEnt.StrTabAddr);
315   if (!DynStrPtr)
316     return appendToError(DynStrPtr.takeError(),
317                          "when locating .dynstr section contents");
318 
319   StringRef DynStr(reinterpret_cast<const char *>(DynStrPtr.get()),
320                    DynEnt.StrSize);
321 
322   // Populate Arch from ELF header.
323   DestStub->Arch = ElfFile->getHeader().e_machine;
324 
325   // Populate SoName from .dynamic entries and dynamic string table.
326   if (DynEnt.SONameOffset.hasValue()) {
327     Expected<StringRef> NameOrErr =
328         terminatedSubstr(DynStr, *DynEnt.SONameOffset);
329     if (!NameOrErr) {
330       return appendToError(NameOrErr.takeError(), "when reading DT_SONAME");
331     }
332     DestStub->SoName = std::string(*NameOrErr);
333   }
334 
335   // Populate NeededLibs from .dynamic entries and dynamic string table.
336   for (uint64_t NeededStrOffset : DynEnt.NeededLibNames) {
337     Expected<StringRef> LibNameOrErr =
338         terminatedSubstr(DynStr, NeededStrOffset);
339     if (!LibNameOrErr) {
340       return appendToError(LibNameOrErr.takeError(), "when reading DT_NEEDED");
341     }
342     DestStub->NeededLibs.push_back(std::string(*LibNameOrErr));
343   }
344 
345   // Populate Symbols from .dynsym table and dynamic string table.
346   Expected<uint64_t> SymCount = getNumSyms(DynEnt, *ElfFile);
347   if (!SymCount)
348     return SymCount.takeError();
349   if (*SymCount > 0) {
350     // Get pointer to in-memory location of .dynsym section.
351     Expected<const uint8_t *> DynSymPtr =
352         ElfFile->toMappedAddr(DynEnt.DynSymAddr);
353     if (!DynSymPtr)
354       return appendToError(DynSymPtr.takeError(),
355                            "when locating .dynsym section contents");
356     Elf_Sym_Range DynSyms = ArrayRef<Elf_Sym>(
357         reinterpret_cast<const Elf_Sym *>(*DynSymPtr), *SymCount);
358     Error SymReadError = populateSymbols<ELFT>(*DestStub, DynSyms, DynStr);
359     if (SymReadError)
360       return appendToError(std::move(SymReadError),
361                            "when reading dynamic symbols");
362   }
363 
364   return std::move(DestStub);
365 }
366 
readELFFile(MemoryBufferRef Buf)367 Expected<std::unique_ptr<ELFStub>> readELFFile(MemoryBufferRef Buf) {
368   Expected<std::unique_ptr<Binary>> BinOrErr = createBinary(Buf);
369   if (!BinOrErr) {
370     return BinOrErr.takeError();
371   }
372 
373   Binary *Bin = BinOrErr->get();
374   if (auto Obj = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
375     return buildStub(*Obj);
376   } else if (auto Obj = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
377     return buildStub(*Obj);
378   } else if (auto Obj = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
379     return buildStub(*Obj);
380   } else if (auto Obj = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
381     return buildStub(*Obj);
382   }
383 
384   return createStringError(errc::not_supported, "Unsupported binary format");
385 }
386 
387 } // end namespace elfabi
388 } // end namespace llvm
389