1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_COFF_INPUT_FILES_H
10 #define LLD_COFF_INPUT_FILES_H
11 
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/BinaryFormat/Magic.h"
18 #include "llvm/Object/Archive.h"
19 #include "llvm/Object/COFF.h"
20 #include "llvm/Support/StringSaver.h"
21 #include <memory>
22 #include <set>
23 #include <vector>
24 
25 namespace llvm {
26 struct DILineInfo;
27 namespace pdb {
28 class DbiModuleDescriptorBuilder;
29 }
30 namespace lto {
31 class InputFile;
32 }
33 }
34 
35 namespace lld {
36 class DWARFCache;
37 
38 namespace coff {
39 
40 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
41 
42 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
43 using llvm::COFF::MachineTypes;
44 using llvm::object::Archive;
45 using llvm::object::COFFObjectFile;
46 using llvm::object::COFFSymbolRef;
47 using llvm::object::coff_import_header;
48 using llvm::object::coff_section;
49 
50 class Chunk;
51 class Defined;
52 class DefinedImportData;
53 class DefinedImportThunk;
54 class DefinedRegular;
55 class SectionChunk;
56 class Symbol;
57 class Undefined;
58 class TpiSource;
59 
60 // The root class of input files.
61 class InputFile {
62 public:
63   enum Kind {
64     ArchiveKind,
65     ObjectKind,
66     LazyObjectKind,
67     ImportKind,
68     BitcodeKind
69   };
70   Kind kind() const { return fileKind; }
71   virtual ~InputFile() {}
72 
73   // Returns the filename.
74   StringRef getName() const { return mb.getBufferIdentifier(); }
75 
76   // Reads a file (the constructor doesn't do that).
77   virtual void parse() = 0;
78 
79   // Returns the CPU type this file was compiled to.
80   virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
81 
82   MemoryBufferRef mb;
83 
84   // An archive file name if this file is created from an archive.
85   StringRef parentName;
86 
87   // Returns .drectve section contents if exist.
88   StringRef getDirectives() { return directives; }
89 
90 protected:
91   InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
92 
93   StringRef directives;
94 
95 private:
96   const Kind fileKind;
97 };
98 
99 // .lib or .a file.
100 class ArchiveFile : public InputFile {
101 public:
102   explicit ArchiveFile(MemoryBufferRef m);
103   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
104   void parse() override;
105 
106   // Enqueues an archive member load for the given symbol. If we've already
107   // enqueued a load for the same archive member, this function does nothing,
108   // which ensures that we don't load the same member more than once.
109   void addMember(const Archive::Symbol &sym);
110 
111 private:
112   std::unique_ptr<Archive> file;
113   llvm::DenseSet<uint64_t> seen;
114 };
115 
116 // .obj or .o file between -start-lib and -end-lib.
117 class LazyObjFile : public InputFile {
118 public:
119   explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {}
120   static bool classof(const InputFile *f) {
121     return f->kind() == LazyObjectKind;
122   }
123   // Makes this object file part of the link.
124   void fetch();
125   // Adds the symbols in this file to the symbol table as LazyObject symbols.
126   void parse() override;
127 
128 private:
129   std::vector<Symbol *> symbols;
130 };
131 
132 // .obj or .o file. This may be a member of an archive file.
133 class ObjFile : public InputFile {
134 public:
135   explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {}
136   explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols)
137       : InputFile(ObjectKind, m), symbols(std::move(symbols)) {}
138   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
139   void parse() override;
140   MachineTypes getMachineType() override;
141   ArrayRef<Chunk *> getChunks() { return chunks; }
142   ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
143   ArrayRef<SectionChunk *> getSXDataChunks() { return sXDataChunks; }
144   ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
145   ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
146   ArrayRef<Symbol *> getSymbols() { return symbols; }
147 
148   ArrayRef<uint8_t> getDebugSection(StringRef secName);
149 
150   // Returns a Symbol object for the symbolIndex'th symbol in the
151   // underlying object file.
152   Symbol *getSymbol(uint32_t symbolIndex) {
153     return symbols[symbolIndex];
154   }
155 
156   // Returns the underlying COFF file.
157   COFFObjectFile *getCOFFObj() { return coffObj.get(); }
158 
159   // Add a symbol for a range extension thunk. Return the new symbol table
160   // index. This index can be used to modify a relocation.
161   uint32_t addRangeThunkSymbol(Symbol *thunk) {
162     symbols.push_back(thunk);
163     return symbols.size() - 1;
164   }
165 
166   void includeResourceChunks();
167 
168   bool isResourceObjFile() const { return !resourceChunks.empty(); }
169 
170   static std::vector<ObjFile *> instances;
171 
172   // Flags in the absolute @feat.00 symbol if it is present. These usually
173   // indicate if an object was compiled with certain security features enabled
174   // like stack guard, safeseh, /guard:cf, or other things.
175   uint32_t feat00Flags = 0;
176 
177   // True if this object file is compatible with SEH.  COFF-specific and
178   // x86-only. COFF spec 5.10.1. The .sxdata section.
179   bool hasSafeSEH() { return feat00Flags & 0x1; }
180 
181   // True if this file was compiled with /guard:cf.
182   bool hasGuardCF() { return feat00Flags & 0x800; }
183 
184   // Pointer to the PDB module descriptor builder. Various debug info records
185   // will reference object files by "module index", which is here. Things like
186   // source files and section contributions are also recorded here. Will be null
187   // if we are not producing a PDB.
188   llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
189 
190   const coff_section *addrsigSec = nullptr;
191 
192   // When using Microsoft precompiled headers, this is the PCH's key.
193   // The same key is used by both the precompiled object, and objects using the
194   // precompiled object. Any difference indicates out-of-date objects.
195   llvm::Optional<uint32_t> pchSignature;
196 
197   // Whether this file was compiled with /hotpatch.
198   bool hotPatchable = false;
199 
200   // Whether the object was already merged into the final PDB.
201   bool mergedIntoPDB = false;
202 
203   // If the OBJ has a .debug$T stream, this tells how it will be handled.
204   TpiSource *debugTypesObj = nullptr;
205 
206   // The .debug$P or .debug$T section data if present. Empty otherwise.
207   ArrayRef<uint8_t> debugTypes;
208 
209   llvm::Optional<std::pair<StringRef, uint32_t>>
210   getVariableLocation(StringRef var);
211 
212   llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
213                                                  uint32_t sectionIndex);
214 
215 private:
216   const coff_section* getSection(uint32_t i);
217   const coff_section *getSection(COFFSymbolRef sym) {
218     return getSection(sym.getSectionNumber());
219   }
220 
221   void initializeChunks();
222   void initializeSymbols();
223   void initializeFlags();
224   void initializeDependencies();
225 
226   SectionChunk *
227   readSection(uint32_t sectionNumber,
228               const llvm::object::coff_aux_section_definition *def,
229               StringRef leaderName);
230 
231   void readAssociativeDefinition(
232       COFFSymbolRef coffSym,
233       const llvm::object::coff_aux_section_definition *def);
234 
235   void readAssociativeDefinition(
236       COFFSymbolRef coffSym,
237       const llvm::object::coff_aux_section_definition *def,
238       uint32_t parentSection);
239 
240   void recordPrevailingSymbolForMingw(
241       COFFSymbolRef coffSym,
242       llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
243 
244   void maybeAssociateSEHForMingw(
245       COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
246       const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
247 
248   // Given a new symbol Sym with comdat selection Selection, if the new
249   // symbol is not (yet) Prevailing and the existing comdat leader set to
250   // Leader, emits a diagnostic if the new symbol and its selection doesn't
251   // match the existing symbol and its selection. If either old or new
252   // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
253   // the existing leader. In that case, Prevailing is set to true.
254   void handleComdatSelection(COFFSymbolRef sym,
255                              llvm::COFF::COMDATType &selection,
256                              bool &prevailing, DefinedRegular *leader);
257 
258   llvm::Optional<Symbol *>
259   createDefined(COFFSymbolRef sym,
260                 std::vector<const llvm::object::coff_aux_section_definition *>
261                     &comdatDefs,
262                 bool &prevailingComdat);
263   Symbol *createRegular(COFFSymbolRef sym);
264   Symbol *createUndefined(COFFSymbolRef sym);
265 
266   std::unique_ptr<COFFObjectFile> coffObj;
267 
268   // List of all chunks defined by this file. This includes both section
269   // chunks and non-section chunks for common symbols.
270   std::vector<Chunk *> chunks;
271 
272   std::vector<SectionChunk *> resourceChunks;
273 
274   // CodeView debug info sections.
275   std::vector<SectionChunk *> debugChunks;
276 
277   // Chunks containing symbol table indices of exception handlers. Only used for
278   // 32-bit x86.
279   std::vector<SectionChunk *> sXDataChunks;
280 
281   // Chunks containing symbol table indices of address taken symbols and longjmp
282   // targets.  These are not linked into the final binary when /guard:cf is set.
283   std::vector<SectionChunk *> guardFidChunks;
284   std::vector<SectionChunk *> guardLJmpChunks;
285 
286   // This vector contains the same chunks as Chunks, but they are
287   // indexed such that you can get a SectionChunk by section index.
288   // Nonexistent section indices are filled with null pointers.
289   // (Because section number is 1-based, the first slot is always a
290   // null pointer.)
291   std::vector<SectionChunk *> sparseChunks;
292 
293   // This vector contains a list of all symbols defined or referenced by this
294   // file. They are indexed such that you can get a Symbol by symbol
295   // index. Nonexistent indices (which are occupied by auxiliary
296   // symbols in the real symbol table) are filled with null pointers.
297   std::vector<Symbol *> symbols;
298 
299   DWARFCache *dwarf = nullptr;
300 };
301 
302 // This type represents import library members that contain DLL names
303 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
304 // for details about the format.
305 class ImportFile : public InputFile {
306 public:
307   explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {}
308 
309   static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
310 
311   static std::vector<ImportFile *> instances;
312 
313   Symbol *impSym = nullptr;
314   Symbol *thunkSym = nullptr;
315   std::string dllName;
316 
317 private:
318   void parse() override;
319 
320 public:
321   StringRef externalName;
322   const coff_import_header *hdr;
323   Chunk *location = nullptr;
324 
325   // We want to eliminate dllimported symbols if no one actually refers them.
326   // These "Live" bits are used to keep track of which import library members
327   // are actually in use.
328   //
329   // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
330   // symbols provided by this import library member. We also track whether the
331   // imported symbol is used separately from whether the thunk is used in order
332   // to avoid creating unnecessary thunks.
333   bool live = !config->doGC;
334   bool thunkLive = !config->doGC;
335 };
336 
337 // Used for LTO.
338 class BitcodeFile : public InputFile {
339 public:
340   BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
341               uint64_t offsetInArchive);
342   explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName,
343                        uint64_t offsetInArchive,
344                        std::vector<Symbol *> &&symbols);
345   ~BitcodeFile();
346   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
347   ArrayRef<Symbol *> getSymbols() { return symbols; }
348   MachineTypes getMachineType() override;
349   static std::vector<BitcodeFile *> instances;
350   std::unique_ptr<llvm::lto::InputFile> obj;
351 
352 private:
353   void parse() override;
354 
355   std::vector<Symbol *> symbols;
356 };
357 
358 inline bool isBitcode(MemoryBufferRef mb) {
359   return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
360 }
361 
362 std::string replaceThinLTOSuffix(StringRef path);
363 } // namespace coff
364 
365 std::string toString(const coff::InputFile *file);
366 } // namespace lld
367 
368 #endif
369