1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_COFF_INPUT_FILES_H
10 #define LLD_COFF_INPUT_FILES_H
11 
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/BinaryFormat/Magic.h"
19 #include "llvm/Object/Archive.h"
20 #include "llvm/Object/COFF.h"
21 #include "llvm/Support/StringSaver.h"
22 #include <memory>
23 #include <set>
24 #include <vector>
25 
26 namespace llvm {
27 struct DILineInfo;
28 namespace pdb {
29 class DbiModuleDescriptorBuilder;
30 class NativeSession;
31 }
32 namespace lto {
33 class InputFile;
34 }
35 }
36 
37 namespace lld {
38 class DWARFCache;
39 
40 namespace coff {
41 
42 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
43 
44 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
45 using llvm::COFF::MachineTypes;
46 using llvm::object::Archive;
47 using llvm::object::COFFObjectFile;
48 using llvm::object::COFFSymbolRef;
49 using llvm::object::coff_import_header;
50 using llvm::object::coff_section;
51 
52 class Chunk;
53 class Defined;
54 class DefinedImportData;
55 class DefinedImportThunk;
56 class DefinedRegular;
57 class SectionChunk;
58 class Symbol;
59 class Undefined;
60 class TpiSource;
61 
62 // The root class of input files.
63 class InputFile {
64 public:
65   enum Kind {
66     ArchiveKind,
67     ObjectKind,
68     LazyObjectKind,
69     PDBKind,
70     ImportKind,
71     BitcodeKind,
72     DLLKind
73   };
kind()74   Kind kind() const { return fileKind; }
~InputFile()75   virtual ~InputFile() {}
76 
77   // Returns the filename.
getName()78   StringRef getName() const { return mb.getBufferIdentifier(); }
79 
80   // Reads a file (the constructor doesn't do that).
81   virtual void parse() = 0;
82 
83   // Returns the CPU type this file was compiled to.
getMachineType()84   virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
85 
86   MemoryBufferRef mb;
87 
88   // An archive file name if this file is created from an archive.
89   StringRef parentName;
90 
91   // Returns .drectve section contents if exist.
getDirectives()92   StringRef getDirectives() { return directives; }
93 
94 protected:
InputFile(Kind k,MemoryBufferRef m)95   InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
96 
97   StringRef directives;
98 
99 private:
100   const Kind fileKind;
101 };
102 
103 // .lib or .a file.
104 class ArchiveFile : public InputFile {
105 public:
106   explicit ArchiveFile(MemoryBufferRef m);
classof(const InputFile * f)107   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
108   void parse() override;
109 
110   // Enqueues an archive member load for the given symbol. If we've already
111   // enqueued a load for the same archive member, this function does nothing,
112   // which ensures that we don't load the same member more than once.
113   void addMember(const Archive::Symbol &sym);
114 
115 private:
116   std::unique_ptr<Archive> file;
117   llvm::DenseSet<uint64_t> seen;
118 };
119 
120 // .obj or .o file between -start-lib and -end-lib.
121 class LazyObjFile : public InputFile {
122 public:
LazyObjFile(MemoryBufferRef m)123   explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {}
classof(const InputFile * f)124   static bool classof(const InputFile *f) {
125     return f->kind() == LazyObjectKind;
126   }
127   // Makes this object file part of the link.
128   void fetch();
129   // Adds the symbols in this file to the symbol table as LazyObject symbols.
130   void parse() override;
131 
132 private:
133   std::vector<Symbol *> symbols;
134 };
135 
136 // .obj or .o file. This may be a member of an archive file.
137 class ObjFile : public InputFile {
138 public:
ObjFile(MemoryBufferRef m)139   explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {}
ObjFile(MemoryBufferRef m,std::vector<Symbol * > && symbols)140   explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols)
141       : InputFile(ObjectKind, m), symbols(std::move(symbols)) {}
classof(const InputFile * f)142   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
143   void parse() override;
144   MachineTypes getMachineType() override;
getChunks()145   ArrayRef<Chunk *> getChunks() { return chunks; }
getDebugChunks()146   ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
getSXDataChunks()147   ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
getGuardFidChunks()148   ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
getGuardIATChunks()149   ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
getGuardLJmpChunks()150   ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
getGuardEHContChunks()151   ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; }
getSymbols()152   ArrayRef<Symbol *> getSymbols() { return symbols; }
153 
getMutableSymbols()154   MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
155 
156   ArrayRef<uint8_t> getDebugSection(StringRef secName);
157 
158   // Returns a Symbol object for the symbolIndex'th symbol in the
159   // underlying object file.
getSymbol(uint32_t symbolIndex)160   Symbol *getSymbol(uint32_t symbolIndex) {
161     return symbols[symbolIndex];
162   }
163 
164   // Returns the underlying COFF file.
getCOFFObj()165   COFFObjectFile *getCOFFObj() { return coffObj.get(); }
166 
167   // Add a symbol for a range extension thunk. Return the new symbol table
168   // index. This index can be used to modify a relocation.
addRangeThunkSymbol(Symbol * thunk)169   uint32_t addRangeThunkSymbol(Symbol *thunk) {
170     symbols.push_back(thunk);
171     return symbols.size() - 1;
172   }
173 
174   void includeResourceChunks();
175 
isResourceObjFile()176   bool isResourceObjFile() const { return !resourceChunks.empty(); }
177 
178   static std::vector<ObjFile *> instances;
179 
180   // Flags in the absolute @feat.00 symbol if it is present. These usually
181   // indicate if an object was compiled with certain security features enabled
182   // like stack guard, safeseh, /guard:cf, or other things.
183   uint32_t feat00Flags = 0;
184 
185   // True if this object file is compatible with SEH.  COFF-specific and
186   // x86-only. COFF spec 5.10.1. The .sxdata section.
hasSafeSEH()187   bool hasSafeSEH() { return feat00Flags & 0x1; }
188 
189   // True if this file was compiled with /guard:cf.
hasGuardCF()190   bool hasGuardCF() { return feat00Flags & 0x4800; }
191 
192   // Pointer to the PDB module descriptor builder. Various debug info records
193   // will reference object files by "module index", which is here. Things like
194   // source files and section contributions are also recorded here. Will be null
195   // if we are not producing a PDB.
196   llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
197 
198   const coff_section *addrsigSec = nullptr;
199 
200   const coff_section *callgraphSec = nullptr;
201 
202   // When using Microsoft precompiled headers, this is the PCH's key.
203   // The same key is used by both the precompiled object, and objects using the
204   // precompiled object. Any difference indicates out-of-date objects.
205   llvm::Optional<uint32_t> pchSignature;
206 
207   // Whether this file was compiled with /hotpatch.
208   bool hotPatchable = false;
209 
210   // Whether the object was already merged into the final PDB.
211   bool mergedIntoPDB = false;
212 
213   // If the OBJ has a .debug$T stream, this tells how it will be handled.
214   TpiSource *debugTypesObj = nullptr;
215 
216   // The .debug$P or .debug$T section data if present. Empty otherwise.
217   ArrayRef<uint8_t> debugTypes;
218 
219   llvm::Optional<std::pair<StringRef, uint32_t>>
220   getVariableLocation(StringRef var);
221 
222   llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
223                                                  uint32_t sectionIndex);
224 
225 private:
226   const coff_section* getSection(uint32_t i);
getSection(COFFSymbolRef sym)227   const coff_section *getSection(COFFSymbolRef sym) {
228     return getSection(sym.getSectionNumber());
229   }
230 
231   void initializeChunks();
232   void initializeSymbols();
233   void initializeFlags();
234   void initializeDependencies();
235 
236   SectionChunk *
237   readSection(uint32_t sectionNumber,
238               const llvm::object::coff_aux_section_definition *def,
239               StringRef leaderName);
240 
241   void readAssociativeDefinition(
242       COFFSymbolRef coffSym,
243       const llvm::object::coff_aux_section_definition *def);
244 
245   void readAssociativeDefinition(
246       COFFSymbolRef coffSym,
247       const llvm::object::coff_aux_section_definition *def,
248       uint32_t parentSection);
249 
250   void recordPrevailingSymbolForMingw(
251       COFFSymbolRef coffSym,
252       llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
253 
254   void maybeAssociateSEHForMingw(
255       COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
256       const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
257 
258   // Given a new symbol Sym with comdat selection Selection, if the new
259   // symbol is not (yet) Prevailing and the existing comdat leader set to
260   // Leader, emits a diagnostic if the new symbol and its selection doesn't
261   // match the existing symbol and its selection. If either old or new
262   // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
263   // the existing leader. In that case, Prevailing is set to true.
264   void
265   handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
266                         bool &prevailing, DefinedRegular *leader,
267                         const llvm::object::coff_aux_section_definition *def);
268 
269   llvm::Optional<Symbol *>
270   createDefined(COFFSymbolRef sym,
271                 std::vector<const llvm::object::coff_aux_section_definition *>
272                     &comdatDefs,
273                 bool &prevailingComdat);
274   Symbol *createRegular(COFFSymbolRef sym);
275   Symbol *createUndefined(COFFSymbolRef sym);
276 
277   std::unique_ptr<COFFObjectFile> coffObj;
278 
279   // List of all chunks defined by this file. This includes both section
280   // chunks and non-section chunks for common symbols.
281   std::vector<Chunk *> chunks;
282 
283   std::vector<SectionChunk *> resourceChunks;
284 
285   // CodeView debug info sections.
286   std::vector<SectionChunk *> debugChunks;
287 
288   // Chunks containing symbol table indices of exception handlers. Only used for
289   // 32-bit x86.
290   std::vector<SectionChunk *> sxDataChunks;
291 
292   // Chunks containing symbol table indices of address taken symbols, address
293   // taken IAT entries, longjmp and ehcont targets. These are not linked into
294   // the final binary when /guard:cf is set.
295   std::vector<SectionChunk *> guardFidChunks;
296   std::vector<SectionChunk *> guardIATChunks;
297   std::vector<SectionChunk *> guardLJmpChunks;
298   std::vector<SectionChunk *> guardEHContChunks;
299 
300   // This vector contains a list of all symbols defined or referenced by this
301   // file. They are indexed such that you can get a Symbol by symbol
302   // index. Nonexistent indices (which are occupied by auxiliary
303   // symbols in the real symbol table) are filled with null pointers.
304   std::vector<Symbol *> symbols;
305 
306   // This vector contains the same chunks as Chunks, but they are
307   // indexed such that you can get a SectionChunk by section index.
308   // Nonexistent section indices are filled with null pointers.
309   // (Because section number is 1-based, the first slot is always a
310   // null pointer.) This vector is only valid during initialization.
311   std::vector<SectionChunk *> sparseChunks;
312 
313   DWARFCache *dwarf = nullptr;
314 };
315 
316 // This is a PDB type server dependency, that is not a input file per se, but
317 // needs to be treated like one. Such files are discovered from the debug type
318 // stream.
319 class PDBInputFile : public InputFile {
320 public:
321   explicit PDBInputFile(MemoryBufferRef m);
322   ~PDBInputFile();
classof(const InputFile * f)323   static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
324   void parse() override;
325 
326   static void enqueue(StringRef path, ObjFile *fromFile);
327 
328   static PDBInputFile *findFromRecordPath(StringRef path, ObjFile *fromFile);
329 
330   static std::map<std::string, PDBInputFile *> instances;
331 
332   // Record possible errors while opening the PDB file
333   llvm::Optional<Error> loadErr;
334 
335   // This is the actual interface to the PDB (if it was opened successfully)
336   std::unique_ptr<llvm::pdb::NativeSession> session;
337 
338   // If the PDB has a .debug$T stream, this tells how it will be handled.
339   TpiSource *debugTypesObj = nullptr;
340 };
341 
342 // This type represents import library members that contain DLL names
343 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
344 // for details about the format.
345 class ImportFile : public InputFile {
346 public:
ImportFile(MemoryBufferRef m)347   explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {}
348 
classof(const InputFile * f)349   static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
350 
351   static std::vector<ImportFile *> instances;
352 
353   Symbol *impSym = nullptr;
354   Symbol *thunkSym = nullptr;
355   std::string dllName;
356 
357 private:
358   void parse() override;
359 
360 public:
361   StringRef externalName;
362   const coff_import_header *hdr;
363   Chunk *location = nullptr;
364 
365   // We want to eliminate dllimported symbols if no one actually refers to them.
366   // These "Live" bits are used to keep track of which import library members
367   // are actually in use.
368   //
369   // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
370   // symbols provided by this import library member. We also track whether the
371   // imported symbol is used separately from whether the thunk is used in order
372   // to avoid creating unnecessary thunks.
373   bool live = !config->doGC;
374   bool thunkLive = !config->doGC;
375 };
376 
377 // Used for LTO.
378 class BitcodeFile : public InputFile {
379 public:
380   BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
381               uint64_t offsetInArchive);
382   explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName,
383                        uint64_t offsetInArchive,
384                        std::vector<Symbol *> &&symbols);
385   ~BitcodeFile();
classof(const InputFile * f)386   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
getSymbols()387   ArrayRef<Symbol *> getSymbols() { return symbols; }
388   MachineTypes getMachineType() override;
389   static std::vector<BitcodeFile *> instances;
390   std::unique_ptr<llvm::lto::InputFile> obj;
391 
392 private:
393   void parse() override;
394 
395   std::vector<Symbol *> symbols;
396 };
397 
398 // .dll file. MinGW only.
399 class DLLFile : public InputFile {
400 public:
DLLFile(MemoryBufferRef m)401   explicit DLLFile(MemoryBufferRef m) : InputFile(DLLKind, m) {}
classof(const InputFile * f)402   static bool classof(const InputFile *f) { return f->kind() == DLLKind; }
403   void parse() override;
404   MachineTypes getMachineType() override;
405 
406   struct Symbol {
407     StringRef dllName;
408     StringRef symbolName;
409     llvm::COFF::ImportNameType nameType;
410     llvm::COFF::ImportType importType;
411   };
412 
413   void makeImport(Symbol *s);
414 
415 private:
416   std::unique_ptr<COFFObjectFile> coffObj;
417   llvm::StringSet<> seen;
418 };
419 
isBitcode(MemoryBufferRef mb)420 inline bool isBitcode(MemoryBufferRef mb) {
421   return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
422 }
423 
424 std::string replaceThinLTOSuffix(StringRef path);
425 } // namespace coff
426 
427 std::string toString(const coff::InputFile *file);
428 } // namespace lld
429 
430 #endif
431