1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_COFF_INPUT_FILES_H
10 #define LLD_COFF_INPUT_FILES_H
11 
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/BinaryFormat/Magic.h"
19 #include "llvm/Object/Archive.h"
20 #include "llvm/Object/COFF.h"
21 #include "llvm/Support/StringSaver.h"
22 #include <memory>
23 #include <set>
24 #include <vector>
25 
26 namespace llvm {
27 struct DILineInfo;
28 namespace pdb {
29 class DbiModuleDescriptorBuilder;
30 class NativeSession;
31 }
32 namespace lto {
33 class InputFile;
34 }
35 }
36 
37 namespace lld {
38 class DWARFCache;
39 
40 namespace coff {
41 class COFFLinkerContext;
42 
43 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
44 
45 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
46 using llvm::COFF::MachineTypes;
47 using llvm::object::Archive;
48 using llvm::object::COFFObjectFile;
49 using llvm::object::COFFSymbolRef;
50 using llvm::object::coff_import_header;
51 using llvm::object::coff_section;
52 
53 class Chunk;
54 class Defined;
55 class DefinedImportData;
56 class DefinedImportThunk;
57 class DefinedRegular;
58 class SectionChunk;
59 class Symbol;
60 class Undefined;
61 class TpiSource;
62 
63 // The root class of input files.
64 class InputFile {
65 public:
66   enum Kind {
67     ArchiveKind,
68     ObjectKind,
69     LazyObjectKind,
70     PDBKind,
71     ImportKind,
72     BitcodeKind,
73     DLLKind
74   };
kind()75   Kind kind() const { return fileKind; }
~InputFile()76   virtual ~InputFile() {}
77 
78   // Returns the filename.
getName()79   StringRef getName() const { return mb.getBufferIdentifier(); }
80 
81   // Reads a file (the constructor doesn't do that).
82   virtual void parse() = 0;
83 
84   // Returns the CPU type this file was compiled to.
getMachineType()85   virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
86 
87   MemoryBufferRef mb;
88 
89   // An archive file name if this file is created from an archive.
90   StringRef parentName;
91 
92   // Returns .drectve section contents if exist.
getDirectives()93   StringRef getDirectives() { return directives; }
94 
95   COFFLinkerContext &ctx;
96 
97 protected:
98   InputFile(COFFLinkerContext &c, Kind k, MemoryBufferRef m, bool lazy = false)
mb(m)99       : mb(m), ctx(c), fileKind(k), lazy(lazy) {}
100 
101   StringRef directives;
102 
103 private:
104   const Kind fileKind;
105 
106 public:
107   // True if this is a lazy ObjFile or BitcodeFile.
108   bool lazy = false;
109 };
110 
111 // .lib or .a file.
112 class ArchiveFile : public InputFile {
113 public:
114   explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m);
classof(const InputFile * f)115   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
116   void parse() override;
117 
118   // Enqueues an archive member load for the given symbol. If we've already
119   // enqueued a load for the same archive member, this function does nothing,
120   // which ensures that we don't load the same member more than once.
121   void addMember(const Archive::Symbol &sym);
122 
123 private:
124   std::unique_ptr<Archive> file;
125   llvm::DenseSet<uint64_t> seen;
126 };
127 
128 // .obj or .o file. This may be a member of an archive file.
129 class ObjFile : public InputFile {
130 public:
131   explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy = false)
InputFile(ctx,ObjectKind,m,lazy)132       : InputFile(ctx, ObjectKind, m, lazy) {}
classof(const InputFile * f)133   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
134   void parse() override;
135   void parseLazy();
136   MachineTypes getMachineType() override;
getChunks()137   ArrayRef<Chunk *> getChunks() { return chunks; }
getDebugChunks()138   ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
getSXDataChunks()139   ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
getGuardFidChunks()140   ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
getGuardIATChunks()141   ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
getGuardLJmpChunks()142   ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
getGuardEHContChunks()143   ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; }
getSymbols()144   ArrayRef<Symbol *> getSymbols() { return symbols; }
145 
getMutableSymbols()146   MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
147 
148   ArrayRef<uint8_t> getDebugSection(StringRef secName);
149 
150   // Returns a Symbol object for the symbolIndex'th symbol in the
151   // underlying object file.
getSymbol(uint32_t symbolIndex)152   Symbol *getSymbol(uint32_t symbolIndex) {
153     return symbols[symbolIndex];
154   }
155 
156   // Returns the underlying COFF file.
getCOFFObj()157   COFFObjectFile *getCOFFObj() { return coffObj.get(); }
158 
159   // Add a symbol for a range extension thunk. Return the new symbol table
160   // index. This index can be used to modify a relocation.
addRangeThunkSymbol(Symbol * thunk)161   uint32_t addRangeThunkSymbol(Symbol *thunk) {
162     symbols.push_back(thunk);
163     return symbols.size() - 1;
164   }
165 
166   void includeResourceChunks();
167 
isResourceObjFile()168   bool isResourceObjFile() const { return !resourceChunks.empty(); }
169 
170   // Flags in the absolute @feat.00 symbol if it is present. These usually
171   // indicate if an object was compiled with certain security features enabled
172   // like stack guard, safeseh, /guard:cf, or other things.
173   uint32_t feat00Flags = 0;
174 
175   // True if this object file is compatible with SEH.  COFF-specific and
176   // x86-only. COFF spec 5.10.1. The .sxdata section.
hasSafeSEH()177   bool hasSafeSEH() { return feat00Flags & 0x1; }
178 
179   // True if this file was compiled with /guard:cf.
hasGuardCF()180   bool hasGuardCF() { return feat00Flags & 0x800; }
181 
182   // True if this file was compiled with /guard:ehcont.
hasGuardEHCont()183   bool hasGuardEHCont() { return feat00Flags & 0x4000; }
184 
185   // Pointer to the PDB module descriptor builder. Various debug info records
186   // will reference object files by "module index", which is here. Things like
187   // source files and section contributions are also recorded here. Will be null
188   // if we are not producing a PDB.
189   llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
190 
191   const coff_section *addrsigSec = nullptr;
192 
193   const coff_section *callgraphSec = nullptr;
194 
195   // When using Microsoft precompiled headers, this is the PCH's key.
196   // The same key is used by both the precompiled object, and objects using the
197   // precompiled object. Any difference indicates out-of-date objects.
198   std::optional<uint32_t> pchSignature;
199 
200   // Whether this file was compiled with /hotpatch.
201   bool hotPatchable = false;
202 
203   // Whether the object was already merged into the final PDB.
204   bool mergedIntoPDB = false;
205 
206   // If the OBJ has a .debug$T stream, this tells how it will be handled.
207   TpiSource *debugTypesObj = nullptr;
208 
209   // The .debug$P or .debug$T section data if present. Empty otherwise.
210   ArrayRef<uint8_t> debugTypes;
211 
212   std::optional<std::pair<StringRef, uint32_t>>
213   getVariableLocation(StringRef var);
214 
215   std::optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
216                                                 uint32_t sectionIndex);
217 
218 private:
219   const coff_section* getSection(uint32_t i);
getSection(COFFSymbolRef sym)220   const coff_section *getSection(COFFSymbolRef sym) {
221     return getSection(sym.getSectionNumber());
222   }
223 
224   void enqueuePdbFile(StringRef path, ObjFile *fromFile);
225 
226   void initializeChunks();
227   void initializeSymbols();
228   void initializeFlags();
229   void initializeDependencies();
230 
231   SectionChunk *
232   readSection(uint32_t sectionNumber,
233               const llvm::object::coff_aux_section_definition *def,
234               StringRef leaderName);
235 
236   void readAssociativeDefinition(
237       COFFSymbolRef coffSym,
238       const llvm::object::coff_aux_section_definition *def);
239 
240   void readAssociativeDefinition(
241       COFFSymbolRef coffSym,
242       const llvm::object::coff_aux_section_definition *def,
243       uint32_t parentSection);
244 
245   void recordPrevailingSymbolForMingw(
246       COFFSymbolRef coffSym,
247       llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
248 
249   void maybeAssociateSEHForMingw(
250       COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
251       const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
252 
253   // Given a new symbol Sym with comdat selection Selection, if the new
254   // symbol is not (yet) Prevailing and the existing comdat leader set to
255   // Leader, emits a diagnostic if the new symbol and its selection doesn't
256   // match the existing symbol and its selection. If either old or new
257   // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
258   // the existing leader. In that case, Prevailing is set to true.
259   void
260   handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
261                         bool &prevailing, DefinedRegular *leader,
262                         const llvm::object::coff_aux_section_definition *def);
263 
264   std::optional<Symbol *>
265   createDefined(COFFSymbolRef sym,
266                 std::vector<const llvm::object::coff_aux_section_definition *>
267                     &comdatDefs,
268                 bool &prevailingComdat);
269   Symbol *createRegular(COFFSymbolRef sym);
270   Symbol *createUndefined(COFFSymbolRef sym);
271 
272   std::unique_ptr<COFFObjectFile> coffObj;
273 
274   // List of all chunks defined by this file. This includes both section
275   // chunks and non-section chunks for common symbols.
276   std::vector<Chunk *> chunks;
277 
278   std::vector<SectionChunk *> resourceChunks;
279 
280   // CodeView debug info sections.
281   std::vector<SectionChunk *> debugChunks;
282 
283   // Chunks containing symbol table indices of exception handlers. Only used for
284   // 32-bit x86.
285   std::vector<SectionChunk *> sxDataChunks;
286 
287   // Chunks containing symbol table indices of address taken symbols, address
288   // taken IAT entries, longjmp and ehcont targets. These are not linked into
289   // the final binary when /guard:cf is set.
290   std::vector<SectionChunk *> guardFidChunks;
291   std::vector<SectionChunk *> guardIATChunks;
292   std::vector<SectionChunk *> guardLJmpChunks;
293   std::vector<SectionChunk *> guardEHContChunks;
294 
295   // This vector contains a list of all symbols defined or referenced by this
296   // file. They are indexed such that you can get a Symbol by symbol
297   // index. Nonexistent indices (which are occupied by auxiliary
298   // symbols in the real symbol table) are filled with null pointers.
299   std::vector<Symbol *> symbols;
300 
301   // This vector contains the same chunks as Chunks, but they are
302   // indexed such that you can get a SectionChunk by section index.
303   // Nonexistent section indices are filled with null pointers.
304   // (Because section number is 1-based, the first slot is always a
305   // null pointer.) This vector is only valid during initialization.
306   std::vector<SectionChunk *> sparseChunks;
307 
308   DWARFCache *dwarf = nullptr;
309 };
310 
311 // This is a PDB type server dependency, that is not a input file per se, but
312 // needs to be treated like one. Such files are discovered from the debug type
313 // stream.
314 class PDBInputFile : public InputFile {
315 public:
316   explicit PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m);
317   ~PDBInputFile();
classof(const InputFile * f)318   static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
319   void parse() override;
320 
321   static PDBInputFile *findFromRecordPath(const COFFLinkerContext &ctx,
322                                           StringRef path, ObjFile *fromFile);
323 
324   // Record possible errors while opening the PDB file
325   std::optional<std::string> loadErrorStr;
326 
327   // This is the actual interface to the PDB (if it was opened successfully)
328   std::unique_ptr<llvm::pdb::NativeSession> session;
329 
330   // If the PDB has a .debug$T stream, this tells how it will be handled.
331   TpiSource *debugTypesObj = nullptr;
332 };
333 
334 // This type represents import library members that contain DLL names
335 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
336 // for details about the format.
337 class ImportFile : public InputFile {
338 public:
339   explicit ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m);
340 
classof(const InputFile * f)341   static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
342 
343   Symbol *impSym = nullptr;
344   Symbol *thunkSym = nullptr;
345   std::string dllName;
346 
347 private:
348   void parse() override;
349 
350 public:
351   StringRef externalName;
352   const coff_import_header *hdr;
353   Chunk *location = nullptr;
354 
355   // We want to eliminate dllimported symbols if no one actually refers to them.
356   // These "Live" bits are used to keep track of which import library members
357   // are actually in use.
358   //
359   // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
360   // symbols provided by this import library member. We also track whether the
361   // imported symbol is used separately from whether the thunk is used in order
362   // to avoid creating unnecessary thunks.
363   bool live;
364   bool thunkLive;
365 };
366 
367 // Used for LTO.
368 class BitcodeFile : public InputFile {
369 public:
370   explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
371                        StringRef archiveName, uint64_t offsetInArchive,
372                        bool lazy);
373   ~BitcodeFile();
classof(const InputFile * f)374   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
getSymbols()375   ArrayRef<Symbol *> getSymbols() { return symbols; }
376   MachineTypes getMachineType() override;
377   void parseLazy();
378   std::unique_ptr<llvm::lto::InputFile> obj;
379 
380 private:
381   void parse() override;
382 
383   std::vector<Symbol *> symbols;
384 };
385 
386 // .dll file. MinGW only.
387 class DLLFile : public InputFile {
388 public:
DLLFile(COFFLinkerContext & ctx,MemoryBufferRef m)389   explicit DLLFile(COFFLinkerContext &ctx, MemoryBufferRef m)
390       : InputFile(ctx, DLLKind, m) {}
classof(const InputFile * f)391   static bool classof(const InputFile *f) { return f->kind() == DLLKind; }
392   void parse() override;
393   MachineTypes getMachineType() override;
394 
395   struct Symbol {
396     StringRef dllName;
397     StringRef symbolName;
398     llvm::COFF::ImportNameType nameType;
399     llvm::COFF::ImportType importType;
400   };
401 
402   void makeImport(Symbol *s);
403 
404 private:
405   std::unique_ptr<COFFObjectFile> coffObj;
406   llvm::StringSet<> seen;
407 };
408 
isBitcode(MemoryBufferRef mb)409 inline bool isBitcode(MemoryBufferRef mb) {
410   return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
411 }
412 
413 std::string replaceThinLTOSuffix(StringRef path, StringRef suffix,
414                                  StringRef repl);
415 } // namespace coff
416 
417 std::string toString(const coff::InputFile *file);
418 } // namespace lld
419 
420 #endif
421