1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_INPUT_FILES_H
10 #define LLD_ELF_INPUT_FILES_H
11 
12 #include "Config.h"
13 #include "Symbols.h"
14 #include "lld/Common/ErrorHandler.h"
15 #include "lld/Common/LLVM.h"
16 #include "lld/Common/Reproduce.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/BinaryFormat/Magic.h"
19 #include "llvm/Object/ELF.h"
20 #include "llvm/Support/MemoryBufferRef.h"
21 #include "llvm/Support/Threading.h"
22 
23 namespace llvm {
24 struct DILineInfo;
25 class TarWriter;
26 namespace lto {
27 class InputFile;
28 }
29 } // namespace llvm
30 
31 namespace lld {
32 class DWARFCache;
33 
34 // Returns "<internal>", "foo.a(bar.o)" or "baz.o".
35 std::string toString(const elf::InputFile *f);
36 
37 namespace elf {
38 
39 class InputSection;
40 class Symbol;
41 
42 // If --reproduce is specified, all input files are written to this tar archive.
43 extern std::unique_ptr<llvm::TarWriter> tar;
44 
45 // Opens a given file.
46 llvm::Optional<MemoryBufferRef> readFile(StringRef path);
47 
48 // Add symbols in File to the symbol table.
49 void parseFile(InputFile *file);
50 
51 // The root class of input files.
52 class InputFile {
53 protected:
54   SmallVector<Symbol *, 0> symbols;
55   SmallVector<InputSectionBase *, 0> sections;
56 
57 public:
58   enum Kind : uint8_t {
59     ObjKind,
60     SharedKind,
61     ArchiveKind,
62     BitcodeKind,
63     BinaryKind,
64   };
65 
66   Kind kind() const { return fileKind; }
67 
68   bool isElf() const {
69     Kind k = kind();
70     return k == ObjKind || k == SharedKind;
71   }
72 
73   StringRef getName() const { return mb.getBufferIdentifier(); }
74   MemoryBufferRef mb;
75 
76   // Returns sections. It is a runtime error to call this function
77   // on files that don't have the notion of sections.
78   ArrayRef<InputSectionBase *> getSections() const {
79     assert(fileKind == ObjKind || fileKind == BinaryKind);
80     return sections;
81   }
82 
83   // Returns object file symbols. It is a runtime error to call this
84   // function on files of other types.
85   ArrayRef<Symbol *> getSymbols() const {
86     assert(fileKind == BinaryKind || fileKind == ObjKind ||
87            fileKind == BitcodeKind);
88     return symbols;
89   }
90 
91   // Get filename to use for linker script processing.
92   StringRef getNameForScript() const;
93 
94   // Check if a non-common symbol should be extracted to override a common
95   // definition.
96   bool shouldExtractForCommon(StringRef name);
97 
98   // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute
99   // offsets in PLT call stubs.
100   InputSection *ppc32Got2 = nullptr;
101 
102   // Index of MIPS GOT built for this file.
103   uint32_t mipsGotIndex = -1;
104 
105   // groupId is used for --warn-backrefs which is an optional error
106   // checking feature. All files within the same --{start,end}-group or
107   // --{start,end}-lib get the same group ID. Otherwise, each file gets a new
108   // group ID. For more info, see checkDependency() in SymbolTable.cpp.
109   uint32_t groupId;
110   static bool isInGroup;
111   static uint32_t nextGroupId;
112 
113   // If this is an architecture-specific file, the following members
114   // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type.
115   uint16_t emachine = llvm::ELF::EM_NONE;
116   const Kind fileKind;
117   ELFKind ekind = ELFNoneKind;
118   uint8_t osabi = 0;
119   uint8_t abiVersion = 0;
120 
121   // True if this is a relocatable object file/bitcode file between --start-lib
122   // and --end-lib.
123   bool lazy = false;
124 
125   // True if this is an argument for --just-symbols. Usually false.
126   bool justSymbols = false;
127 
128   std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec,
129                         uint64_t offset);
130 
131   // On PPC64 we need to keep track of which files contain small code model
132   // relocations that access the .toc section. To minimize the chance of a
133   // relocation overflow, files that do contain said relocations should have
134   // their .toc sections sorted closer to the .got section than files that do
135   // not contain any small code model relocations. Thats because the toc-pointer
136   // is defined to point at .got + 0x8000 and the instructions used with small
137   // code model relocations support immediates in the range [-0x8000, 0x7FFC],
138   // making the addressable range relative to the toc pointer
139   // [.got, .got + 0xFFFC].
140   bool ppc64SmallCodeModelTocRelocs = false;
141 
142   // True if the file has TLSGD/TLSLD GOT relocations without R_PPC64_TLSGD or
143   // R_PPC64_TLSLD. Disable TLS relaxation to avoid bad code generation.
144   bool ppc64DisableTLSRelax = false;
145 
146 protected:
147   InputFile(Kind k, MemoryBufferRef m);
148 
149 public:
150   // If not empty, this stores the name of the archive containing this file.
151   // We use this string for creating error messages.
152   SmallString<0> archiveName;
153   // Cache for toString(). Only toString() should use this member.
154   mutable SmallString<0> toStringCache;
155 
156 private:
157   // Cache for getNameForScript().
158   mutable SmallString<0> nameForScriptCache;
159 };
160 
161 class ELFFileBase : public InputFile {
162 public:
163   ELFFileBase(Kind k, MemoryBufferRef m);
164   static bool classof(const InputFile *f) { return f->isElf(); }
165 
166   template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const {
167     return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer()));
168   }
169 
170   StringRef getStringTable() const { return stringTable; }
171 
172   ArrayRef<Symbol *> getLocalSymbols() {
173     if (symbols.empty())
174       return {};
175     return llvm::makeArrayRef(symbols).slice(1, firstGlobal - 1);
176   }
177   ArrayRef<Symbol *> getGlobalSymbols() {
178     return llvm::makeArrayRef(symbols).slice(firstGlobal);
179   }
180   MutableArrayRef<Symbol *> getMutableGlobalSymbols() {
181     return llvm::makeMutableArrayRef(symbols.data(), symbols.size())
182         .slice(firstGlobal);
183   }
184 
185   template <typename ELFT> typename ELFT::ShdrRange getELFShdrs() const {
186     return typename ELFT::ShdrRange(
187         reinterpret_cast<const typename ELFT::Shdr *>(elfShdrs), numELFShdrs);
188   }
189   template <typename ELFT> typename ELFT::SymRange getELFSyms() const {
190     return typename ELFT::SymRange(
191         reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms);
192   }
193   template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const {
194     return getELFSyms<ELFT>().slice(firstGlobal);
195   }
196 
197 protected:
198   // Initializes this class's member variables.
199   template <typename ELFT> void init();
200 
201   StringRef stringTable;
202   const void *elfShdrs = nullptr;
203   const void *elfSyms = nullptr;
204   uint32_t numELFShdrs = 0;
205   uint32_t numELFSyms = 0;
206   uint32_t firstGlobal = 0;
207 
208 public:
209   uint32_t andFeatures = 0;
210   bool hasCommonSyms = false;
211 };
212 
213 // .o file.
214 template <class ELFT> class ObjFile : public ELFFileBase {
215   LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
216 
217 public:
218   static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
219 
220   llvm::object::ELFFile<ELFT> getObj() const {
221     return this->ELFFileBase::getObj<ELFT>();
222   }
223 
224   ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) {
225     this->archiveName = archiveName;
226   }
227 
228   void parse(bool ignoreComdats = false);
229   void parseLazy();
230 
231   StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
232                                  const Elf_Shdr &sec);
233 
234   Symbol &getSymbol(uint32_t symbolIndex) const {
235     if (symbolIndex >= this->symbols.size())
236       fatal(toString(this) + ": invalid symbol index");
237     return *this->symbols[symbolIndex];
238   }
239 
240   uint32_t getSectionIndex(const Elf_Sym &sym) const;
241 
242   template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const {
243     uint32_t symIndex = rel.getSymbol(config->isMips64EL);
244     return getSymbol(symIndex);
245   }
246 
247   llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t);
248   llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name);
249 
250   // Name of source file obtained from STT_FILE symbol value,
251   // or empty string if there is no such symbol in object file
252   // symbol table.
253   StringRef sourceFile;
254 
255   // Pointer to this input file's .llvm_addrsig section, if it has one.
256   const Elf_Shdr *addrsigSec = nullptr;
257 
258   // SHT_LLVM_CALL_GRAPH_PROFILE section index.
259   uint32_t cgProfileSectionIndex = 0;
260 
261   // MIPS GP0 value defined by this file. This value represents the gp value
262   // used to create the relocatable object and required to support
263   // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations.
264   uint32_t mipsGp0 = 0;
265 
266   // True if the file defines functions compiled with
267   // -fsplit-stack. Usually false.
268   bool splitStack = false;
269 
270   // True if the file defines functions compiled with -fsplit-stack,
271   // but had one or more functions with the no_split_stack attribute.
272   bool someNoSplitStack = false;
273 
274   // Get cached DWARF information.
275   DWARFCache *getDwarf();
276 
277   void initializeLocalSymbols();
278   void postParse();
279 
280 private:
281   void initializeSections(bool ignoreComdats,
282                           const llvm::object::ELFFile<ELFT> &obj);
283   void initializeSymbols(const llvm::object::ELFFile<ELFT> &obj);
284   void initializeJustSymbols();
285 
286   InputSectionBase *getRelocTarget(uint32_t idx, const Elf_Shdr &sec,
287                                    uint32_t info);
288   InputSectionBase *createInputSection(uint32_t idx, const Elf_Shdr &sec,
289                                        StringRef name);
290 
291   bool shouldMerge(const Elf_Shdr &sec, StringRef name);
292 
293   // Each ELF symbol contains a section index which the symbol belongs to.
294   // However, because the number of bits dedicated for that is limited, a
295   // symbol can directly point to a section only when the section index is
296   // equal to or smaller than 65280.
297   //
298   // If an object file contains more than 65280 sections, the file must
299   // contain .symtab_shndx section. The section contains an array of
300   // 32-bit integers whose size is the same as the number of symbols.
301   // Nth symbol's section index is in the Nth entry of .symtab_shndx.
302   //
303   // The following variable contains the contents of .symtab_shndx.
304   // If the section does not exist (which is common), the array is empty.
305   ArrayRef<Elf_Word> shndxTable;
306 
307   // Storage for local symbols.
308   std::unique_ptr<SymbolUnion[]> localSymStorage;
309 
310   // Debugging information to retrieve source file and line for error
311   // reporting. Linker may find reasonable number of errors in a
312   // single object file, so we cache debugging information in order to
313   // parse it only once for each object file we link.
314   std::unique_ptr<DWARFCache> dwarf;
315   llvm::once_flag initDwarf;
316 };
317 
318 class BitcodeFile : public InputFile {
319 public:
320   BitcodeFile(MemoryBufferRef m, StringRef archiveName,
321               uint64_t offsetInArchive, bool lazy);
322   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
323   template <class ELFT> void parse();
324   void parseLazy();
325   void postParse();
326   std::unique_ptr<llvm::lto::InputFile> obj;
327   std::vector<bool> keptComdats;
328 };
329 
330 // .so file.
331 class SharedFile : public ELFFileBase {
332 public:
333   SharedFile(MemoryBufferRef m, StringRef defaultSoName)
334       : ELFFileBase(SharedKind, m), soName(defaultSoName),
335         isNeeded(!config->asNeeded) {}
336 
337   // This is actually a vector of Elf_Verdef pointers.
338   SmallVector<const void *, 0> verdefs;
339 
340   // If the output file needs Elf_Verneed data structures for this file, this is
341   // a vector of Elf_Vernaux version identifiers that map onto the entries in
342   // Verdefs, otherwise it is empty.
343   SmallVector<uint32_t, 0> vernauxs;
344 
345   static unsigned vernauxNum;
346 
347   SmallVector<StringRef, 0> dtNeeded;
348   StringRef soName;
349 
350   static bool classof(const InputFile *f) { return f->kind() == SharedKind; }
351 
352   template <typename ELFT> void parse();
353 
354   // Used for --as-needed
355   bool isNeeded;
356 
357   // Non-weak undefined symbols which are not yet resolved when the SO is
358   // parsed. Only filled for `--no-allow-shlib-undefined`.
359   SmallVector<Symbol *, 0> requiredSymbols;
360 
361 private:
362   template <typename ELFT>
363   std::vector<uint32_t> parseVerneed(const llvm::object::ELFFile<ELFT> &obj,
364                                      const typename ELFT::Shdr *sec);
365 };
366 
367 class BinaryFile : public InputFile {
368 public:
369   explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {}
370   static bool classof(const InputFile *f) { return f->kind() == BinaryKind; }
371   void parse();
372 };
373 
374 ELFFileBase *createObjFile(MemoryBufferRef mb, StringRef archiveName = "",
375                            bool lazy = false);
376 
377 std::string replaceThinLTOSuffix(StringRef path);
378 
379 } // namespace elf
380 } // namespace lld
381 
382 #endif
383