1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_COFF_SYMBOLS_H
10 #define LLD_COFF_SYMBOLS_H
11 
12 #include "Chunks.h"
13 #include "Config.h"
14 #include "lld/Common/LLVM.h"
15 #include "lld/Common/Memory.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/Object/Archive.h"
18 #include "llvm/Object/COFF.h"
19 #include <atomic>
20 #include <memory>
21 #include <vector>
22 
23 namespace lld {
24 
25 std::string toString(coff::Symbol &b);
26 
27 // There are two different ways to convert an Archive::Symbol to a string:
28 // One for Microsoft name mangling and one for Itanium name mangling.
29 // Call the functions toCOFFString and toELFString, not just toString.
30 std::string toCOFFString(const coff::Archive::Symbol &b);
31 
32 namespace coff {
33 
34 using llvm::object::Archive;
35 using llvm::object::COFFSymbolRef;
36 using llvm::object::coff_import_header;
37 using llvm::object::coff_symbol_generic;
38 
39 class ArchiveFile;
40 class InputFile;
41 class ObjFile;
42 class SymbolTable;
43 
44 // The base class for real symbol classes.
45 class Symbol {
46 public:
47   enum Kind {
48     // The order of these is significant. We start with the regular defined
49     // symbols as those are the most prevalent and the zero tag is the cheapest
50     // to set. Among the defined kinds, the lower the kind is preferred over
51     // the higher kind when testing whether one symbol should take precedence
52     // over another.
53     DefinedRegularKind = 0,
54     DefinedCommonKind,
55     DefinedLocalImportKind,
56     DefinedImportThunkKind,
57     DefinedImportDataKind,
58     DefinedAbsoluteKind,
59     DefinedSyntheticKind,
60 
61     UndefinedKind,
62     LazyArchiveKind,
63     LazyObjectKind,
64 
65     LastDefinedCOFFKind = DefinedCommonKind,
66     LastDefinedKind = DefinedSyntheticKind,
67   };
68 
69   Kind kind() const { return static_cast<Kind>(symbolKind); }
70 
71   // Returns the symbol name.
72   StringRef getName() {
73     // COFF symbol names are read lazily for a performance reason.
74     // Non-external symbol names are never used by the linker except for logging
75     // or debugging. Their internal references are resolved not by name but by
76     // symbol index. And because they are not external, no one can refer them by
77     // name. Object files contain lots of non-external symbols, and creating
78     // StringRefs for them (which involves lots of strlen() on the string table)
79     // is a waste of time.
80     if (nameData == nullptr)
81       computeName();
82     return StringRef(nameData, nameSize);
83   }
84 
85   void replaceKeepingName(Symbol *other, size_t size);
86 
87   // Returns the file from which this symbol was created.
88   InputFile *getFile();
89 
90   // Indicates that this symbol will be included in the final image. Only valid
91   // after calling markLive.
92   bool isLive() const;
93 
94   bool isLazy() const {
95     return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
96   }
97 
98 private:
99   void computeName();
100 
101 protected:
102   friend SymbolTable;
103   explicit Symbol(Kind k, StringRef n = "")
104       : symbolKind(k), isExternal(true), isCOMDAT(false),
105         writtenToSymtab(false), pendingArchiveLoad(false), isGCRoot(false),
106         isRuntimePseudoReloc(false), nameSize(n.size()),
107         nameData(n.empty() ? nullptr : n.data()) {}
108 
109   const unsigned symbolKind : 8;
110   unsigned isExternal : 1;
111 
112 public:
113   // This bit is used by the \c DefinedRegular subclass.
114   unsigned isCOMDAT : 1;
115 
116   // This bit is used by Writer::createSymbolAndStringTable() to prevent
117   // symbols from being written to the symbol table more than once.
118   unsigned writtenToSymtab : 1;
119 
120   // True if this symbol was referenced by a regular (non-bitcode) object.
121   unsigned isUsedInRegularObj : 1;
122 
123   // True if we've seen both a lazy and an undefined symbol with this symbol
124   // name, which means that we have enqueued an archive member load and should
125   // not load any more archive members to resolve the same symbol.
126   unsigned pendingArchiveLoad : 1;
127 
128   /// True if we've already added this symbol to the list of GC roots.
129   unsigned isGCRoot : 1;
130 
131   unsigned isRuntimePseudoReloc : 1;
132 
133 protected:
134   // Symbol name length. Assume symbol lengths fit in a 32-bit integer.
135   uint32_t nameSize;
136 
137   const char *nameData;
138 };
139 
140 // The base class for any defined symbols, including absolute symbols,
141 // etc.
142 class Defined : public Symbol {
143 public:
144   Defined(Kind k, StringRef n) : Symbol(k, n) {}
145 
146   static bool classof(const Symbol *s) { return s->kind() <= LastDefinedKind; }
147 
148   // Returns the RVA (relative virtual address) of this symbol. The
149   // writer sets and uses RVAs.
150   uint64_t getRVA();
151 
152   // Returns the chunk containing this symbol. Absolute symbols and __ImageBase
153   // do not have chunks, so this may return null.
154   Chunk *getChunk();
155 };
156 
157 // Symbols defined via a COFF object file or bitcode file.  For COFF files, this
158 // stores a coff_symbol_generic*, and names of internal symbols are lazily
159 // loaded through that. For bitcode files, Sym is nullptr and the name is stored
160 // as a decomposed StringRef.
161 class DefinedCOFF : public Defined {
162   friend Symbol;
163 
164 public:
165   DefinedCOFF(Kind k, InputFile *f, StringRef n, const coff_symbol_generic *s)
166       : Defined(k, n), file(f), sym(s) {}
167 
168   static bool classof(const Symbol *s) {
169     return s->kind() <= LastDefinedCOFFKind;
170   }
171 
172   InputFile *getFile() { return file; }
173 
174   COFFSymbolRef getCOFFSymbol();
175 
176   InputFile *file;
177 
178 protected:
179   const coff_symbol_generic *sym;
180 };
181 
182 // Regular defined symbols read from object file symbol tables.
183 class DefinedRegular : public DefinedCOFF {
184 public:
185   DefinedRegular(InputFile *f, StringRef n, bool isCOMDAT,
186                  bool isExternal = false,
187                  const coff_symbol_generic *s = nullptr,
188                  SectionChunk *c = nullptr)
189       : DefinedCOFF(DefinedRegularKind, f, n, s), data(c ? &c->repl : nullptr) {
190     this->isExternal = isExternal;
191     this->isCOMDAT = isCOMDAT;
192   }
193 
194   static bool classof(const Symbol *s) {
195     return s->kind() == DefinedRegularKind;
196   }
197 
198   uint64_t getRVA() const { return (*data)->getRVA() + sym->Value; }
199   SectionChunk *getChunk() const { return *data; }
200   uint32_t getValue() const { return sym->Value; }
201 
202   SectionChunk **data;
203 };
204 
205 class DefinedCommon : public DefinedCOFF {
206 public:
207   DefinedCommon(InputFile *f, StringRef n, uint64_t size,
208                 const coff_symbol_generic *s = nullptr,
209                 CommonChunk *c = nullptr)
210       : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) {
211     this->isExternal = true;
212   }
213 
214   static bool classof(const Symbol *s) {
215     return s->kind() == DefinedCommonKind;
216   }
217 
218   uint64_t getRVA() { return data->getRVA(); }
219   CommonChunk *getChunk() { return data; }
220 
221 private:
222   friend SymbolTable;
223   uint64_t getSize() const { return size; }
224   CommonChunk *data;
225   uint64_t size;
226 };
227 
228 // Absolute symbols.
229 class DefinedAbsolute : public Defined {
230 public:
231   DefinedAbsolute(StringRef n, COFFSymbolRef s)
232       : Defined(DefinedAbsoluteKind, n), va(s.getValue()) {
233     isExternal = s.isExternal();
234   }
235 
236   DefinedAbsolute(StringRef n, uint64_t v)
237       : Defined(DefinedAbsoluteKind, n), va(v) {}
238 
239   static bool classof(const Symbol *s) {
240     return s->kind() == DefinedAbsoluteKind;
241   }
242 
243   uint64_t getRVA() { return va - config->imageBase; }
244   void setVA(uint64_t v) { va = v; }
245   uint64_t getVA() const { return va; }
246 
247   // Section index relocations against absolute symbols resolve to
248   // this 16 bit number, and it is the largest valid section index
249   // plus one. This variable keeps it.
250   static uint16_t numOutputSections;
251 
252 private:
253   uint64_t va;
254 };
255 
256 // This symbol is used for linker-synthesized symbols like __ImageBase and
257 // __safe_se_handler_table.
258 class DefinedSynthetic : public Defined {
259 public:
260   explicit DefinedSynthetic(StringRef name, Chunk *c)
261       : Defined(DefinedSyntheticKind, name), c(c) {}
262 
263   static bool classof(const Symbol *s) {
264     return s->kind() == DefinedSyntheticKind;
265   }
266 
267   // A null chunk indicates that this is __ImageBase. Otherwise, this is some
268   // other synthesized chunk, like SEHTableChunk.
269   uint32_t getRVA() { return c ? c->getRVA() : 0; }
270   Chunk *getChunk() { return c; }
271 
272 private:
273   Chunk *c;
274 };
275 
276 // This class represents a symbol defined in an archive file. It is
277 // created from an archive file header, and it knows how to load an
278 // object file from an archive to replace itself with a defined
279 // symbol. If the resolver finds both Undefined and LazyArchive for
280 // the same name, it will ask the LazyArchive to load a file.
281 class LazyArchive : public Symbol {
282 public:
283   LazyArchive(ArchiveFile *f, const Archive::Symbol s)
284       : Symbol(LazyArchiveKind, s.getName()), file(f), sym(s) {}
285 
286   static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
287 
288   MemoryBufferRef getMemberBuffer();
289 
290   ArchiveFile *file;
291   const Archive::Symbol sym;
292 };
293 
294 class LazyObject : public Symbol {
295 public:
296   LazyObject(LazyObjFile *f, StringRef n)
297       : Symbol(LazyObjectKind, n), file(f) {}
298   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
299   LazyObjFile *file;
300 };
301 
302 // Undefined symbols.
303 class Undefined : public Symbol {
304 public:
305   explicit Undefined(StringRef n) : Symbol(UndefinedKind, n) {}
306 
307   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
308 
309   // An undefined symbol can have a fallback symbol which gives an
310   // undefined symbol a second chance if it would remain undefined.
311   // If it remains undefined, it'll be replaced with whatever the
312   // Alias pointer points to.
313   Symbol *weakAlias = nullptr;
314 
315   // If this symbol is external weak, try to resolve it to a defined
316   // symbol by searching the chain of fallback symbols. Returns the symbol if
317   // successful, otherwise returns null.
318   Defined *getWeakAlias();
319 };
320 
321 // Windows-specific classes.
322 
323 // This class represents a symbol imported from a DLL. This has two
324 // names for internal use and external use. The former is used for
325 // name resolution, and the latter is used for the import descriptor
326 // table in an output. The former has "__imp_" prefix.
327 class DefinedImportData : public Defined {
328 public:
329   DefinedImportData(StringRef n, ImportFile *f)
330       : Defined(DefinedImportDataKind, n), file(f) {
331   }
332 
333   static bool classof(const Symbol *s) {
334     return s->kind() == DefinedImportDataKind;
335   }
336 
337   uint64_t getRVA() { return file->location->getRVA(); }
338   Chunk *getChunk() { return file->location; }
339   void setLocation(Chunk *addressTable) { file->location = addressTable; }
340 
341   StringRef getDLLName() { return file->dllName; }
342   StringRef getExternalName() { return file->externalName; }
343   uint16_t getOrdinal() { return file->hdr->OrdinalHint; }
344 
345   ImportFile *file;
346 };
347 
348 // This class represents a symbol for a jump table entry which jumps
349 // to a function in a DLL. Linker are supposed to create such symbols
350 // without "__imp_" prefix for all function symbols exported from
351 // DLLs, so that you can call DLL functions as regular functions with
352 // a regular name. A function pointer is given as a DefinedImportData.
353 class DefinedImportThunk : public Defined {
354 public:
355   DefinedImportThunk(StringRef name, DefinedImportData *s, uint16_t machine);
356 
357   static bool classof(const Symbol *s) {
358     return s->kind() == DefinedImportThunkKind;
359   }
360 
361   uint64_t getRVA() { return data->getRVA(); }
362   Chunk *getChunk() { return data; }
363 
364   DefinedImportData *wrappedSym;
365 
366 private:
367   Chunk *data;
368 };
369 
370 // If you have a symbol "foo" in your object file, a symbol name
371 // "__imp_foo" becomes automatically available as a pointer to "foo".
372 // This class is for such automatically-created symbols.
373 // Yes, this is an odd feature. We didn't intend to implement that.
374 // This is here just for compatibility with MSVC.
375 class DefinedLocalImport : public Defined {
376 public:
377   DefinedLocalImport(StringRef n, Defined *s)
378       : Defined(DefinedLocalImportKind, n), data(make<LocalImportChunk>(s)) {}
379 
380   static bool classof(const Symbol *s) {
381     return s->kind() == DefinedLocalImportKind;
382   }
383 
384   uint64_t getRVA() { return data->getRVA(); }
385   Chunk *getChunk() { return data; }
386 
387 private:
388   LocalImportChunk *data;
389 };
390 
391 inline uint64_t Defined::getRVA() {
392   switch (kind()) {
393   case DefinedAbsoluteKind:
394     return cast<DefinedAbsolute>(this)->getRVA();
395   case DefinedSyntheticKind:
396     return cast<DefinedSynthetic>(this)->getRVA();
397   case DefinedImportDataKind:
398     return cast<DefinedImportData>(this)->getRVA();
399   case DefinedImportThunkKind:
400     return cast<DefinedImportThunk>(this)->getRVA();
401   case DefinedLocalImportKind:
402     return cast<DefinedLocalImport>(this)->getRVA();
403   case DefinedCommonKind:
404     return cast<DefinedCommon>(this)->getRVA();
405   case DefinedRegularKind:
406     return cast<DefinedRegular>(this)->getRVA();
407   case LazyArchiveKind:
408   case LazyObjectKind:
409   case UndefinedKind:
410     llvm_unreachable("Cannot get the address for an undefined symbol.");
411   }
412   llvm_unreachable("unknown symbol kind");
413 }
414 
415 inline Chunk *Defined::getChunk() {
416   switch (kind()) {
417   case DefinedRegularKind:
418     return cast<DefinedRegular>(this)->getChunk();
419   case DefinedAbsoluteKind:
420     return nullptr;
421   case DefinedSyntheticKind:
422     return cast<DefinedSynthetic>(this)->getChunk();
423   case DefinedImportDataKind:
424     return cast<DefinedImportData>(this)->getChunk();
425   case DefinedImportThunkKind:
426     return cast<DefinedImportThunk>(this)->getChunk();
427   case DefinedLocalImportKind:
428     return cast<DefinedLocalImport>(this)->getChunk();
429   case DefinedCommonKind:
430     return cast<DefinedCommon>(this)->getChunk();
431   case LazyArchiveKind:
432   case LazyObjectKind:
433   case UndefinedKind:
434     llvm_unreachable("Cannot get the chunk of an undefined symbol.");
435   }
436   llvm_unreachable("unknown symbol kind");
437 }
438 
439 // A buffer class that is large enough to hold any Symbol-derived
440 // object. We allocate memory using this class and instantiate a symbol
441 // using the placement new.
442 union SymbolUnion {
443   alignas(DefinedRegular) char a[sizeof(DefinedRegular)];
444   alignas(DefinedCommon) char b[sizeof(DefinedCommon)];
445   alignas(DefinedAbsolute) char c[sizeof(DefinedAbsolute)];
446   alignas(DefinedSynthetic) char d[sizeof(DefinedSynthetic)];
447   alignas(LazyArchive) char e[sizeof(LazyArchive)];
448   alignas(Undefined) char f[sizeof(Undefined)];
449   alignas(DefinedImportData) char g[sizeof(DefinedImportData)];
450   alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)];
451   alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)];
452   alignas(LazyObject) char j[sizeof(LazyObject)];
453 };
454 
455 template <typename T, typename... ArgT>
456 void replaceSymbol(Symbol *s, ArgT &&... arg) {
457   static_assert(std::is_trivially_destructible<T>(),
458                 "Symbol types must be trivially destructible");
459   static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small");
460   static_assert(alignof(T) <= alignof(SymbolUnion),
461                 "SymbolUnion not aligned enough");
462   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
463          "Not a Symbol");
464   new (s) T(std::forward<ArgT>(arg)...);
465 }
466 } // namespace coff
467 
468 } // namespace lld
469 
470 #endif
471