1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_COFF_INPUT_FILES_H
10 #define LLD_COFF_INPUT_FILES_H
11 
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/BinaryFormat/Magic.h"
18 #include "llvm/Object/Archive.h"
19 #include "llvm/Object/COFF.h"
20 #include "llvm/Support/StringSaver.h"
21 #include <memory>
22 #include <set>
23 #include <vector>
24 
25 namespace llvm {
26 struct DILineInfo;
27 namespace pdb {
28 class DbiModuleDescriptorBuilder;
29 class NativeSession;
30 }
31 namespace lto {
32 class InputFile;
33 }
34 }
35 
36 namespace lld {
37 class DWARFCache;
38 
39 namespace coff {
40 
41 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
42 
43 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
44 using llvm::COFF::MachineTypes;
45 using llvm::object::Archive;
46 using llvm::object::COFFObjectFile;
47 using llvm::object::COFFSymbolRef;
48 using llvm::object::coff_import_header;
49 using llvm::object::coff_section;
50 
51 class Chunk;
52 class Defined;
53 class DefinedImportData;
54 class DefinedImportThunk;
55 class DefinedRegular;
56 class SectionChunk;
57 class Symbol;
58 class Undefined;
59 class TpiSource;
60 
61 // The root class of input files.
62 class InputFile {
63 public:
64   enum Kind {
65     ArchiveKind,
66     ObjectKind,
67     LazyObjectKind,
68     PDBKind,
69     ImportKind,
70     BitcodeKind
71   };
72   Kind kind() const { return fileKind; }
73   virtual ~InputFile() {}
74 
75   // Returns the filename.
76   StringRef getName() const { return mb.getBufferIdentifier(); }
77 
78   // Reads a file (the constructor doesn't do that).
79   virtual void parse() = 0;
80 
81   // Returns the CPU type this file was compiled to.
82   virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
83 
84   MemoryBufferRef mb;
85 
86   // An archive file name if this file is created from an archive.
87   StringRef parentName;
88 
89   // Returns .drectve section contents if exist.
90   StringRef getDirectives() { return directives; }
91 
92 protected:
93   InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
94 
95   StringRef directives;
96 
97 private:
98   const Kind fileKind;
99 };
100 
101 // .lib or .a file.
102 class ArchiveFile : public InputFile {
103 public:
104   explicit ArchiveFile(MemoryBufferRef m);
105   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
106   void parse() override;
107 
108   // Enqueues an archive member load for the given symbol. If we've already
109   // enqueued a load for the same archive member, this function does nothing,
110   // which ensures that we don't load the same member more than once.
111   void addMember(const Archive::Symbol &sym);
112 
113 private:
114   std::unique_ptr<Archive> file;
115   llvm::DenseSet<uint64_t> seen;
116 };
117 
118 // .obj or .o file between -start-lib and -end-lib.
119 class LazyObjFile : public InputFile {
120 public:
121   explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {}
122   static bool classof(const InputFile *f) {
123     return f->kind() == LazyObjectKind;
124   }
125   // Makes this object file part of the link.
126   void fetch();
127   // Adds the symbols in this file to the symbol table as LazyObject symbols.
128   void parse() override;
129 
130 private:
131   std::vector<Symbol *> symbols;
132 };
133 
134 // .obj or .o file. This may be a member of an archive file.
135 class ObjFile : public InputFile {
136 public:
137   explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {}
138   explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols)
139       : InputFile(ObjectKind, m), symbols(std::move(symbols)) {}
140   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
141   void parse() override;
142   MachineTypes getMachineType() override;
143   ArrayRef<Chunk *> getChunks() { return chunks; }
144   ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
145   ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
146   ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
147   ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
148   ArrayRef<Symbol *> getSymbols() { return symbols; }
149 
150   ArrayRef<uint8_t> getDebugSection(StringRef secName);
151 
152   // Returns a Symbol object for the symbolIndex'th symbol in the
153   // underlying object file.
154   Symbol *getSymbol(uint32_t symbolIndex) {
155     return symbols[symbolIndex];
156   }
157 
158   // Returns the underlying COFF file.
159   COFFObjectFile *getCOFFObj() { return coffObj.get(); }
160 
161   // Add a symbol for a range extension thunk. Return the new symbol table
162   // index. This index can be used to modify a relocation.
163   uint32_t addRangeThunkSymbol(Symbol *thunk) {
164     symbols.push_back(thunk);
165     return symbols.size() - 1;
166   }
167 
168   void includeResourceChunks();
169 
170   bool isResourceObjFile() const { return !resourceChunks.empty(); }
171 
172   static std::vector<ObjFile *> instances;
173 
174   // Flags in the absolute @feat.00 symbol if it is present. These usually
175   // indicate if an object was compiled with certain security features enabled
176   // like stack guard, safeseh, /guard:cf, or other things.
177   uint32_t feat00Flags = 0;
178 
179   // True if this object file is compatible with SEH.  COFF-specific and
180   // x86-only. COFF spec 5.10.1. The .sxdata section.
181   bool hasSafeSEH() { return feat00Flags & 0x1; }
182 
183   // True if this file was compiled with /guard:cf.
184   bool hasGuardCF() { return feat00Flags & 0x800; }
185 
186   // Pointer to the PDB module descriptor builder. Various debug info records
187   // will reference object files by "module index", which is here. Things like
188   // source files and section contributions are also recorded here. Will be null
189   // if we are not producing a PDB.
190   llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
191 
192   const coff_section *addrsigSec = nullptr;
193 
194   // When using Microsoft precompiled headers, this is the PCH's key.
195   // The same key is used by both the precompiled object, and objects using the
196   // precompiled object. Any difference indicates out-of-date objects.
197   llvm::Optional<uint32_t> pchSignature;
198 
199   // Whether this file was compiled with /hotpatch.
200   bool hotPatchable = false;
201 
202   // Whether the object was already merged into the final PDB.
203   bool mergedIntoPDB = false;
204 
205   // If the OBJ has a .debug$T stream, this tells how it will be handled.
206   TpiSource *debugTypesObj = nullptr;
207 
208   // The .debug$P or .debug$T section data if present. Empty otherwise.
209   ArrayRef<uint8_t> debugTypes;
210 
211   llvm::Optional<std::pair<StringRef, uint32_t>>
212   getVariableLocation(StringRef var);
213 
214   llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
215                                                  uint32_t sectionIndex);
216 
217 private:
218   const coff_section* getSection(uint32_t i);
219   const coff_section *getSection(COFFSymbolRef sym) {
220     return getSection(sym.getSectionNumber());
221   }
222 
223   void initializeChunks();
224   void initializeSymbols();
225   void initializeFlags();
226   void initializeDependencies();
227 
228   SectionChunk *
229   readSection(uint32_t sectionNumber,
230               const llvm::object::coff_aux_section_definition *def,
231               StringRef leaderName);
232 
233   void readAssociativeDefinition(
234       COFFSymbolRef coffSym,
235       const llvm::object::coff_aux_section_definition *def);
236 
237   void readAssociativeDefinition(
238       COFFSymbolRef coffSym,
239       const llvm::object::coff_aux_section_definition *def,
240       uint32_t parentSection);
241 
242   void recordPrevailingSymbolForMingw(
243       COFFSymbolRef coffSym,
244       llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
245 
246   void maybeAssociateSEHForMingw(
247       COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
248       const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
249 
250   // Given a new symbol Sym with comdat selection Selection, if the new
251   // symbol is not (yet) Prevailing and the existing comdat leader set to
252   // Leader, emits a diagnostic if the new symbol and its selection doesn't
253   // match the existing symbol and its selection. If either old or new
254   // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
255   // the existing leader. In that case, Prevailing is set to true.
256   void handleComdatSelection(COFFSymbolRef sym,
257                              llvm::COFF::COMDATType &selection,
258                              bool &prevailing, DefinedRegular *leader);
259 
260   llvm::Optional<Symbol *>
261   createDefined(COFFSymbolRef sym,
262                 std::vector<const llvm::object::coff_aux_section_definition *>
263                     &comdatDefs,
264                 bool &prevailingComdat);
265   Symbol *createRegular(COFFSymbolRef sym);
266   Symbol *createUndefined(COFFSymbolRef sym);
267 
268   std::unique_ptr<COFFObjectFile> coffObj;
269 
270   // List of all chunks defined by this file. This includes both section
271   // chunks and non-section chunks for common symbols.
272   std::vector<Chunk *> chunks;
273 
274   std::vector<SectionChunk *> resourceChunks;
275 
276   // CodeView debug info sections.
277   std::vector<SectionChunk *> debugChunks;
278 
279   // Chunks containing symbol table indices of exception handlers. Only used for
280   // 32-bit x86.
281   std::vector<SectionChunk *> sxDataChunks;
282 
283   // Chunks containing symbol table indices of address taken symbols and longjmp
284   // targets.  These are not linked into the final binary when /guard:cf is set.
285   std::vector<SectionChunk *> guardFidChunks;
286   std::vector<SectionChunk *> guardLJmpChunks;
287 
288   // This vector contains a list of all symbols defined or referenced by this
289   // file. They are indexed such that you can get a Symbol by symbol
290   // index. Nonexistent indices (which are occupied by auxiliary
291   // symbols in the real symbol table) are filled with null pointers.
292   std::vector<Symbol *> symbols;
293 
294   // This vector contains the same chunks as Chunks, but they are
295   // indexed such that you can get a SectionChunk by section index.
296   // Nonexistent section indices are filled with null pointers.
297   // (Because section number is 1-based, the first slot is always a
298   // null pointer.) This vector is only valid during initialization.
299   std::vector<SectionChunk *> sparseChunks;
300 
301   DWARFCache *dwarf = nullptr;
302 };
303 
304 // This is a PDB type server dependency, that is not a input file per se, but
305 // needs to be treated like one. Such files are discovered from the debug type
306 // stream.
307 class PDBInputFile : public InputFile {
308 public:
309   explicit PDBInputFile(MemoryBufferRef m);
310   ~PDBInputFile();
311   static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
312   void parse() override;
313 
314   static void enqueue(StringRef path, ObjFile *fromFile);
315 
316   static PDBInputFile *findFromRecordPath(StringRef path, ObjFile *fromFile);
317 
318   static std::map<std::string, PDBInputFile *> instances;
319 
320   // Record possible errors while opening the PDB file
321   llvm::Optional<Error> loadErr;
322 
323   // This is the actual interface to the PDB (if it was opened successfully)
324   std::unique_ptr<llvm::pdb::NativeSession> session;
325 
326   // If the PDB has a .debug$T stream, this tells how it will be handled.
327   TpiSource *debugTypesObj = nullptr;
328 };
329 
330 // This type represents import library members that contain DLL names
331 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
332 // for details about the format.
333 class ImportFile : public InputFile {
334 public:
335   explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {}
336 
337   static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
338 
339   static std::vector<ImportFile *> instances;
340 
341   Symbol *impSym = nullptr;
342   Symbol *thunkSym = nullptr;
343   std::string dllName;
344 
345 private:
346   void parse() override;
347 
348 public:
349   StringRef externalName;
350   const coff_import_header *hdr;
351   Chunk *location = nullptr;
352 
353   // We want to eliminate dllimported symbols if no one actually refers them.
354   // These "Live" bits are used to keep track of which import library members
355   // are actually in use.
356   //
357   // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
358   // symbols provided by this import library member. We also track whether the
359   // imported symbol is used separately from whether the thunk is used in order
360   // to avoid creating unnecessary thunks.
361   bool live = !config->doGC;
362   bool thunkLive = !config->doGC;
363 };
364 
365 // Used for LTO.
366 class BitcodeFile : public InputFile {
367 public:
368   BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
369               uint64_t offsetInArchive);
370   explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName,
371                        uint64_t offsetInArchive,
372                        std::vector<Symbol *> &&symbols);
373   ~BitcodeFile();
374   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
375   ArrayRef<Symbol *> getSymbols() { return symbols; }
376   MachineTypes getMachineType() override;
377   static std::vector<BitcodeFile *> instances;
378   std::unique_ptr<llvm::lto::InputFile> obj;
379 
380 private:
381   void parse() override;
382 
383   std::vector<Symbol *> symbols;
384 };
385 
386 inline bool isBitcode(MemoryBufferRef mb) {
387   return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
388 }
389 
390 std::string replaceThinLTOSuffix(StringRef path);
391 } // namespace coff
392 
393 std::string toString(const coff::InputFile *file);
394 } // namespace lld
395 
396 #endif
397