1ece8a530Spatrick //===- InputFiles.h ---------------------------------------------*- C++ -*-===// 2ece8a530Spatrick // 3ece8a530Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4ece8a530Spatrick // See https://llvm.org/LICENSE.txt for license information. 5ece8a530Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ece8a530Spatrick // 7ece8a530Spatrick //===----------------------------------------------------------------------===// 8ece8a530Spatrick 9ece8a530Spatrick #ifndef LLD_ELF_INPUT_FILES_H 10ece8a530Spatrick #define LLD_ELF_INPUT_FILES_H 11ece8a530Spatrick 12ece8a530Spatrick #include "Config.h" 13*05edf1c1Srobert #include "Symbols.h" 14ece8a530Spatrick #include "lld/Common/ErrorHandler.h" 15ece8a530Spatrick #include "lld/Common/LLVM.h" 16ece8a530Spatrick #include "lld/Common/Reproduce.h" 17ece8a530Spatrick #include "llvm/ADT/DenseSet.h" 18*05edf1c1Srobert #include "llvm/BinaryFormat/Magic.h" 19ece8a530Spatrick #include "llvm/Object/ELF.h" 20*05edf1c1Srobert #include "llvm/Support/MemoryBufferRef.h" 21ece8a530Spatrick #include "llvm/Support/Threading.h" 22ece8a530Spatrick 23ece8a530Spatrick namespace llvm { 24ece8a530Spatrick struct DILineInfo; 25ece8a530Spatrick class TarWriter; 26ece8a530Spatrick namespace lto { 27ece8a530Spatrick class InputFile; 28ece8a530Spatrick } 29ece8a530Spatrick } // namespace llvm 30ece8a530Spatrick 31ece8a530Spatrick namespace lld { 32ece8a530Spatrick class DWARFCache; 33ece8a530Spatrick 34ece8a530Spatrick // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 35ece8a530Spatrick std::string toString(const elf::InputFile *f); 36ece8a530Spatrick 377c5ea754Srobert void parseGNUWarning(StringRef name, ArrayRef<char> data, size_t size); 387c5ea754Srobert 39ece8a530Spatrick namespace elf { 40ece8a530Spatrick 41*05edf1c1Srobert class InputSection; 42ece8a530Spatrick class Symbol; 43ece8a530Spatrick 44*05edf1c1Srobert // If --reproduce is specified, all input files are written to this tar archive. 45ece8a530Spatrick extern std::unique_ptr<llvm::TarWriter> tar; 46ece8a530Spatrick 47ece8a530Spatrick // Opens a given file. 48*05edf1c1Srobert std::optional<MemoryBufferRef> readFile(StringRef path); 49ece8a530Spatrick 50ece8a530Spatrick // Add symbols in File to the symbol table. 51ece8a530Spatrick void parseFile(InputFile *file); 52ece8a530Spatrick 53ece8a530Spatrick // The root class of input files. 54ece8a530Spatrick class InputFile { 55*05edf1c1Srobert protected: 56*05edf1c1Srobert std::unique_ptr<Symbol *[]> symbols; 57*05edf1c1Srobert uint32_t numSymbols = 0; 58*05edf1c1Srobert SmallVector<InputSectionBase *, 0> sections; 59*05edf1c1Srobert 60ece8a530Spatrick public: 61*05edf1c1Srobert enum Kind : uint8_t { 62ece8a530Spatrick ObjKind, 63ece8a530Spatrick SharedKind, 64ece8a530Spatrick ArchiveKind, 65ece8a530Spatrick BitcodeKind, 66ece8a530Spatrick BinaryKind, 67ece8a530Spatrick }; 68ece8a530Spatrick kind()69ece8a530Spatrick Kind kind() const { return fileKind; } 70ece8a530Spatrick isElf()71ece8a530Spatrick bool isElf() const { 72ece8a530Spatrick Kind k = kind(); 73ece8a530Spatrick return k == ObjKind || k == SharedKind; 74ece8a530Spatrick } 75ece8a530Spatrick getName()76ece8a530Spatrick StringRef getName() const { return mb.getBufferIdentifier(); } 77ece8a530Spatrick MemoryBufferRef mb; 78ece8a530Spatrick 79ece8a530Spatrick // Returns sections. It is a runtime error to call this function 80ece8a530Spatrick // on files that don't have the notion of sections. getSections()81ece8a530Spatrick ArrayRef<InputSectionBase *> getSections() const { 82ece8a530Spatrick assert(fileKind == ObjKind || fileKind == BinaryKind); 83ece8a530Spatrick return sections; 84ece8a530Spatrick } 85ece8a530Spatrick 86ece8a530Spatrick // Returns object file symbols. It is a runtime error to call this 87ece8a530Spatrick // function on files of other types. getSymbols()88*05edf1c1Srobert ArrayRef<Symbol *> getSymbols() const { 89ece8a530Spatrick assert(fileKind == BinaryKind || fileKind == ObjKind || 90ece8a530Spatrick fileKind == BitcodeKind); 91*05edf1c1Srobert return {symbols.get(), numSymbols}; 92ece8a530Spatrick } 93ece8a530Spatrick 941cf9926bSpatrick // Get filename to use for linker script processing. 951cf9926bSpatrick StringRef getNameForScript() const; 961cf9926bSpatrick 97*05edf1c1Srobert // Check if a non-common symbol should be extracted to override a common 98*05edf1c1Srobert // definition. 99*05edf1c1Srobert bool shouldExtractForCommon(StringRef name); 100*05edf1c1Srobert 101*05edf1c1Srobert // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute 102*05edf1c1Srobert // offsets in PLT call stubs. 103*05edf1c1Srobert InputSection *ppc32Got2 = nullptr; 104*05edf1c1Srobert 105*05edf1c1Srobert // Index of MIPS GOT built for this file. 106*05edf1c1Srobert uint32_t mipsGotIndex = -1; 107*05edf1c1Srobert 108*05edf1c1Srobert // groupId is used for --warn-backrefs which is an optional error 109*05edf1c1Srobert // checking feature. All files within the same --{start,end}-group or 110*05edf1c1Srobert // --{start,end}-lib get the same group ID. Otherwise, each file gets a new 111*05edf1c1Srobert // group ID. For more info, see checkDependency() in SymbolTable.cpp. 112*05edf1c1Srobert uint32_t groupId; 113*05edf1c1Srobert static bool isInGroup; 114*05edf1c1Srobert static uint32_t nextGroupId; 115ece8a530Spatrick 116ece8a530Spatrick // If this is an architecture-specific file, the following members 117ece8a530Spatrick // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. 118ece8a530Spatrick uint16_t emachine = llvm::ELF::EM_NONE; 119*05edf1c1Srobert const Kind fileKind; 120*05edf1c1Srobert ELFKind ekind = ELFNoneKind; 121ece8a530Spatrick uint8_t osabi = 0; 122ece8a530Spatrick uint8_t abiVersion = 0; 123ece8a530Spatrick 124*05edf1c1Srobert // True if this is a relocatable object file/bitcode file between --start-lib 125*05edf1c1Srobert // and --end-lib. 126*05edf1c1Srobert bool lazy = false; 127ece8a530Spatrick 128ece8a530Spatrick // True if this is an argument for --just-symbols. Usually false. 129ece8a530Spatrick bool justSymbols = false; 130ece8a530Spatrick 131*05edf1c1Srobert std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec, 132*05edf1c1Srobert uint64_t offset); 133ece8a530Spatrick 134ece8a530Spatrick // On PPC64 we need to keep track of which files contain small code model 135ece8a530Spatrick // relocations that access the .toc section. To minimize the chance of a 136ece8a530Spatrick // relocation overflow, files that do contain said relocations should have 137ece8a530Spatrick // their .toc sections sorted closer to the .got section than files that do 138ece8a530Spatrick // not contain any small code model relocations. Thats because the toc-pointer 139ece8a530Spatrick // is defined to point at .got + 0x8000 and the instructions used with small 140ece8a530Spatrick // code model relocations support immediates in the range [-0x8000, 0x7FFC], 141ece8a530Spatrick // making the addressable range relative to the toc pointer 142ece8a530Spatrick // [.got, .got + 0xFFFC]. 143ece8a530Spatrick bool ppc64SmallCodeModelTocRelocs = false; 144ece8a530Spatrick 1451cf9926bSpatrick // True if the file has TLSGD/TLSLD GOT relocations without R_PPC64_TLSGD or 1461cf9926bSpatrick // R_PPC64_TLSLD. Disable TLS relaxation to avoid bad code generation. 1471cf9926bSpatrick bool ppc64DisableTLSRelax = false; 1481cf9926bSpatrick 149ece8a530Spatrick protected: 150ece8a530Spatrick InputFile(Kind k, MemoryBufferRef m); 151*05edf1c1Srobert 152*05edf1c1Srobert public: 153*05edf1c1Srobert // If not empty, this stores the name of the archive containing this file. 154*05edf1c1Srobert // We use this string for creating error messages. 155*05edf1c1Srobert SmallString<0> archiveName; 156*05edf1c1Srobert // Cache for toString(). Only toString() should use this member. 157*05edf1c1Srobert mutable SmallString<0> toStringCache; 158ece8a530Spatrick 159ece8a530Spatrick private: 1601cf9926bSpatrick // Cache for getNameForScript(). 161*05edf1c1Srobert mutable SmallString<0> nameForScriptCache; 162ece8a530Spatrick }; 163ece8a530Spatrick 164ece8a530Spatrick class ELFFileBase : public InputFile { 165ece8a530Spatrick public: 166*05edf1c1Srobert ELFFileBase(Kind k, ELFKind ekind, MemoryBufferRef m); classof(const InputFile * f)167ece8a530Spatrick static bool classof(const InputFile *f) { return f->isElf(); } 168ece8a530Spatrick 169*05edf1c1Srobert void init(); getObj()170ece8a530Spatrick template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const { 171ece8a530Spatrick return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer())); 172ece8a530Spatrick } 173ece8a530Spatrick getStringTable()174ece8a530Spatrick StringRef getStringTable() const { return stringTable; } 175ece8a530Spatrick getLocalSymbols()176*05edf1c1Srobert ArrayRef<Symbol *> getLocalSymbols() { 177*05edf1c1Srobert if (numSymbols == 0) 178*05edf1c1Srobert return {}; 179*05edf1c1Srobert return llvm::ArrayRef(symbols.get() + 1, firstGlobal - 1); 180*05edf1c1Srobert } getGlobalSymbols()181*05edf1c1Srobert ArrayRef<Symbol *> getGlobalSymbols() { 182*05edf1c1Srobert return llvm::ArrayRef(symbols.get() + firstGlobal, 183*05edf1c1Srobert numSymbols - firstGlobal); 184*05edf1c1Srobert } getMutableGlobalSymbols()185*05edf1c1Srobert MutableArrayRef<Symbol *> getMutableGlobalSymbols() { 186*05edf1c1Srobert return llvm::MutableArrayRef(symbols.get() + firstGlobal, 187*05edf1c1Srobert numSymbols - firstGlobal); 188*05edf1c1Srobert } 189*05edf1c1Srobert getELFShdrs()190*05edf1c1Srobert template <typename ELFT> typename ELFT::ShdrRange getELFShdrs() const { 191*05edf1c1Srobert return typename ELFT::ShdrRange( 192*05edf1c1Srobert reinterpret_cast<const typename ELFT::Shdr *>(elfShdrs), numELFShdrs); 193*05edf1c1Srobert } getELFSyms()194ece8a530Spatrick template <typename ELFT> typename ELFT::SymRange getELFSyms() const { 195ece8a530Spatrick return typename ELFT::SymRange( 196ece8a530Spatrick reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms); 197ece8a530Spatrick } getGlobalELFSyms()198ece8a530Spatrick template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const { 199ece8a530Spatrick return getELFSyms<ELFT>().slice(firstGlobal); 200ece8a530Spatrick } 201ece8a530Spatrick 202ece8a530Spatrick protected: 203ece8a530Spatrick // Initializes this class's member variables. 204*05edf1c1Srobert template <typename ELFT> void init(InputFile::Kind k); 205ece8a530Spatrick 206ece8a530Spatrick StringRef stringTable; 207*05edf1c1Srobert const void *elfShdrs = nullptr; 208*05edf1c1Srobert const void *elfSyms = nullptr; 209*05edf1c1Srobert uint32_t numELFShdrs = 0; 210*05edf1c1Srobert uint32_t numELFSyms = 0; 211*05edf1c1Srobert uint32_t firstGlobal = 0; 212*05edf1c1Srobert 213*05edf1c1Srobert public: 214*05edf1c1Srobert uint32_t andFeatures = 0; 215*05edf1c1Srobert bool hasCommonSyms = false; 216ece8a530Spatrick }; 217ece8a530Spatrick 218ece8a530Spatrick // .o file. 219ece8a530Spatrick template <class ELFT> class ObjFile : public ELFFileBase { LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)2201cf9926bSpatrick LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) 221ece8a530Spatrick 222ece8a530Spatrick public: 223ece8a530Spatrick static bool classof(const InputFile *f) { return f->kind() == ObjKind; } 224ece8a530Spatrick getObj()225ece8a530Spatrick llvm::object::ELFFile<ELFT> getObj() const { 226ece8a530Spatrick return this->ELFFileBase::getObj<ELFT>(); 227ece8a530Spatrick } 228ece8a530Spatrick ObjFile(ELFKind ekind,MemoryBufferRef m,StringRef archiveName)229*05edf1c1Srobert ObjFile(ELFKind ekind, MemoryBufferRef m, StringRef archiveName) 230*05edf1c1Srobert : ELFFileBase(ObjKind, ekind, m) { 231*05edf1c1Srobert this->archiveName = archiveName; 232ece8a530Spatrick } 233ece8a530Spatrick 234ece8a530Spatrick void parse(bool ignoreComdats = false); 235*05edf1c1Srobert void parseLazy(); 236ece8a530Spatrick 237ece8a530Spatrick StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 238ece8a530Spatrick const Elf_Shdr &sec); 239ece8a530Spatrick getSymbol(uint32_t symbolIndex)240ece8a530Spatrick Symbol &getSymbol(uint32_t symbolIndex) const { 241*05edf1c1Srobert if (symbolIndex >= numSymbols) 242ece8a530Spatrick fatal(toString(this) + ": invalid symbol index"); 243ece8a530Spatrick return *this->symbols[symbolIndex]; 244ece8a530Spatrick } 245ece8a530Spatrick 246ece8a530Spatrick uint32_t getSectionIndex(const Elf_Sym &sym) const; 247ece8a530Spatrick getRelocTargetSym(const RelT & rel)248ece8a530Spatrick template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const { 249ece8a530Spatrick uint32_t symIndex = rel.getSymbol(config->isMips64EL); 250ece8a530Spatrick return getSymbol(symIndex); 251ece8a530Spatrick } 252ece8a530Spatrick 253*05edf1c1Srobert std::optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t); 254*05edf1c1Srobert std::optional<std::pair<std::string, unsigned>> 255*05edf1c1Srobert getVariableLoc(StringRef name); 256ece8a530Spatrick 257ece8a530Spatrick // Name of source file obtained from STT_FILE symbol value, 258ece8a530Spatrick // or empty string if there is no such symbol in object file 259ece8a530Spatrick // symbol table. 260ece8a530Spatrick StringRef sourceFile; 261ece8a530Spatrick 262*05edf1c1Srobert // Pointer to this input file's .llvm_addrsig section, if it has one. 263*05edf1c1Srobert const Elf_Shdr *addrsigSec = nullptr; 264*05edf1c1Srobert 265*05edf1c1Srobert // SHT_LLVM_CALL_GRAPH_PROFILE section index. 266*05edf1c1Srobert uint32_t cgProfileSectionIndex = 0; 267*05edf1c1Srobert 268*05edf1c1Srobert // MIPS GP0 value defined by this file. This value represents the gp value 269*05edf1c1Srobert // used to create the relocatable object and required to support 270*05edf1c1Srobert // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations. 271*05edf1c1Srobert uint32_t mipsGp0 = 0; 272*05edf1c1Srobert 273ece8a530Spatrick // True if the file defines functions compiled with 274ece8a530Spatrick // -fsplit-stack. Usually false. 275ece8a530Spatrick bool splitStack = false; 276ece8a530Spatrick 277ece8a530Spatrick // True if the file defines functions compiled with -fsplit-stack, 278ece8a530Spatrick // but had one or more functions with the no_split_stack attribute. 279ece8a530Spatrick bool someNoSplitStack = false; 280ece8a530Spatrick 281bb684c34Spatrick // Get cached DWARF information. 282bb684c34Spatrick DWARFCache *getDwarf(); 283bb684c34Spatrick 284*05edf1c1Srobert void initSectionsAndLocalSyms(bool ignoreComdats); 285*05edf1c1Srobert void postParse(); 286*05edf1c1Srobert 287ece8a530Spatrick private: 288*05edf1c1Srobert void initializeSections(bool ignoreComdats, 289*05edf1c1Srobert const llvm::object::ELFFile<ELFT> &obj); 290*05edf1c1Srobert void initializeSymbols(const llvm::object::ELFFile<ELFT> &obj); 291ece8a530Spatrick void initializeJustSymbols(); 292bb684c34Spatrick 293*05edf1c1Srobert InputSectionBase *getRelocTarget(uint32_t idx, const Elf_Shdr &sec, 294*05edf1c1Srobert uint32_t info); 295*05edf1c1Srobert InputSectionBase *createInputSection(uint32_t idx, const Elf_Shdr &sec, 296*05edf1c1Srobert StringRef name); 297ece8a530Spatrick 298ece8a530Spatrick bool shouldMerge(const Elf_Shdr &sec, StringRef name); 299ece8a530Spatrick 300ece8a530Spatrick // Each ELF symbol contains a section index which the symbol belongs to. 301ece8a530Spatrick // However, because the number of bits dedicated for that is limited, a 302ece8a530Spatrick // symbol can directly point to a section only when the section index is 303ece8a530Spatrick // equal to or smaller than 65280. 304ece8a530Spatrick // 305ece8a530Spatrick // If an object file contains more than 65280 sections, the file must 306ece8a530Spatrick // contain .symtab_shndx section. The section contains an array of 307ece8a530Spatrick // 32-bit integers whose size is the same as the number of symbols. 308ece8a530Spatrick // Nth symbol's section index is in the Nth entry of .symtab_shndx. 309ece8a530Spatrick // 310ece8a530Spatrick // The following variable contains the contents of .symtab_shndx. 311ece8a530Spatrick // If the section does not exist (which is common), the array is empty. 312ece8a530Spatrick ArrayRef<Elf_Word> shndxTable; 313ece8a530Spatrick 314ece8a530Spatrick // Debugging information to retrieve source file and line for error 315ece8a530Spatrick // reporting. Linker may find reasonable number of errors in a 316ece8a530Spatrick // single object file, so we cache debugging information in order to 317ece8a530Spatrick // parse it only once for each object file we link. 318bb684c34Spatrick std::unique_ptr<DWARFCache> dwarf; 319bb684c34Spatrick llvm::once_flag initDwarf; 320ece8a530Spatrick }; 321ece8a530Spatrick 322ece8a530Spatrick class BitcodeFile : public InputFile { 323ece8a530Spatrick public: 324ece8a530Spatrick BitcodeFile(MemoryBufferRef m, StringRef archiveName, 325*05edf1c1Srobert uint64_t offsetInArchive, bool lazy); classof(const InputFile * f)326ece8a530Spatrick static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } 327*05edf1c1Srobert void parse(); 328*05edf1c1Srobert void parseLazy(); 329*05edf1c1Srobert void postParse(); 330ece8a530Spatrick std::unique_ptr<llvm::lto::InputFile> obj; 331*05edf1c1Srobert std::vector<bool> keptComdats; 332ece8a530Spatrick }; 333ece8a530Spatrick 334ece8a530Spatrick // .so file. 335ece8a530Spatrick class SharedFile : public ELFFileBase { 336ece8a530Spatrick public: 337*05edf1c1Srobert SharedFile(MemoryBufferRef m, StringRef defaultSoName); 338ece8a530Spatrick 339ece8a530Spatrick // This is actually a vector of Elf_Verdef pointers. 340*05edf1c1Srobert SmallVector<const void *, 0> verdefs; 341ece8a530Spatrick 342ece8a530Spatrick // If the output file needs Elf_Verneed data structures for this file, this is 343ece8a530Spatrick // a vector of Elf_Vernaux version identifiers that map onto the entries in 344ece8a530Spatrick // Verdefs, otherwise it is empty. 345*05edf1c1Srobert SmallVector<uint32_t, 0> vernauxs; 346ece8a530Spatrick 347ece8a530Spatrick static unsigned vernauxNum; 348ece8a530Spatrick 349*05edf1c1Srobert SmallVector<StringRef, 0> dtNeeded; 350*05edf1c1Srobert StringRef soName; 351ece8a530Spatrick classof(const InputFile * f)352ece8a530Spatrick static bool classof(const InputFile *f) { return f->kind() == SharedKind; } 353ece8a530Spatrick 354ece8a530Spatrick template <typename ELFT> void parse(); 355ece8a530Spatrick 356ece8a530Spatrick // Used for --as-needed 357ece8a530Spatrick bool isNeeded; 358bb684c34Spatrick 3591cf9926bSpatrick // Non-weak undefined symbols which are not yet resolved when the SO is 3601cf9926bSpatrick // parsed. Only filled for `--no-allow-shlib-undefined`. 361*05edf1c1Srobert SmallVector<Symbol *, 0> requiredSymbols; 3621cf9926bSpatrick 363bb684c34Spatrick private: 364bb684c34Spatrick template <typename ELFT> 365bb684c34Spatrick std::vector<uint32_t> parseVerneed(const llvm::object::ELFFile<ELFT> &obj, 366bb684c34Spatrick const typename ELFT::Shdr *sec); 367ece8a530Spatrick }; 368ece8a530Spatrick 369ece8a530Spatrick class BinaryFile : public InputFile { 370ece8a530Spatrick public: BinaryFile(MemoryBufferRef m)371ece8a530Spatrick explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {} classof(const InputFile * f)372ece8a530Spatrick static bool classof(const InputFile *f) { return f->kind() == BinaryKind; } 373ece8a530Spatrick void parse(); 374ece8a530Spatrick }; 375ece8a530Spatrick 376*05edf1c1Srobert ELFFileBase *createObjFile(MemoryBufferRef mb, StringRef archiveName = "", 377*05edf1c1Srobert bool lazy = false); 378ece8a530Spatrick 379ece8a530Spatrick std::string replaceThinLTOSuffix(StringRef path); 380ece8a530Spatrick 381ece8a530Spatrick } // namespace elf 382ece8a530Spatrick } // namespace lld 383ece8a530Spatrick 384ece8a530Spatrick #endif 385