1 //===- Object.h -------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TOOLS_OBJCOPY_OBJECT_H 10 #define LLVM_TOOLS_OBJCOPY_OBJECT_H 11 12 #include "Buffer.h" 13 #include "CopyConfig.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/ADT/Twine.h" 17 #include "llvm/BinaryFormat/ELF.h" 18 #include "llvm/MC/StringTableBuilder.h" 19 #include "llvm/Object/ELFObjectFile.h" 20 #include "llvm/Support/Errc.h" 21 #include "llvm/Support/FileOutputBuffer.h" 22 #include <cstddef> 23 #include <cstdint> 24 #include <functional> 25 #include <memory> 26 #include <set> 27 #include <vector> 28 29 namespace llvm { 30 enum class DebugCompressionType; 31 namespace objcopy { 32 namespace elf { 33 34 class SectionBase; 35 class Section; 36 class OwnedDataSection; 37 class StringTableSection; 38 class SymbolTableSection; 39 class RelocationSection; 40 class DynamicRelocationSection; 41 class GnuDebugLinkSection; 42 class GroupSection; 43 class SectionIndexSection; 44 class CompressedSection; 45 class DecompressedSection; 46 class Segment; 47 class Object; 48 struct Symbol; 49 50 class SectionTableRef { 51 MutableArrayRef<std::unique_ptr<SectionBase>> Sections; 52 53 public: 54 using iterator = pointee_iterator<std::unique_ptr<SectionBase> *>; 55 SectionTableRef(MutableArrayRef<std::unique_ptr<SectionBase>> Secs)56 explicit SectionTableRef(MutableArrayRef<std::unique_ptr<SectionBase>> Secs) 57 : Sections(Secs) {} 58 SectionTableRef(const SectionTableRef &) = default; 59 begin()60 iterator begin() const { return iterator(Sections.data()); } end()61 iterator end() const { return iterator(Sections.data() + Sections.size()); } size()62 size_t size() const { return Sections.size(); } 63 64 SectionBase *getSection(uint32_t Index, Twine ErrMsg); 65 66 template <class T> 67 T *getSectionOfType(uint32_t Index, Twine IndexErrMsg, Twine TypeErrMsg); 68 }; 69 70 enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE }; 71 72 class SectionVisitor { 73 public: 74 virtual ~SectionVisitor() = default; 75 76 virtual void visit(const Section &Sec) = 0; 77 virtual void visit(const OwnedDataSection &Sec) = 0; 78 virtual void visit(const StringTableSection &Sec) = 0; 79 virtual void visit(const SymbolTableSection &Sec) = 0; 80 virtual void visit(const RelocationSection &Sec) = 0; 81 virtual void visit(const DynamicRelocationSection &Sec) = 0; 82 virtual void visit(const GnuDebugLinkSection &Sec) = 0; 83 virtual void visit(const GroupSection &Sec) = 0; 84 virtual void visit(const SectionIndexSection &Sec) = 0; 85 virtual void visit(const CompressedSection &Sec) = 0; 86 virtual void visit(const DecompressedSection &Sec) = 0; 87 }; 88 89 class MutableSectionVisitor { 90 public: 91 virtual ~MutableSectionVisitor() = default; 92 93 virtual void visit(Section &Sec) = 0; 94 virtual void visit(OwnedDataSection &Sec) = 0; 95 virtual void visit(StringTableSection &Sec) = 0; 96 virtual void visit(SymbolTableSection &Sec) = 0; 97 virtual void visit(RelocationSection &Sec) = 0; 98 virtual void visit(DynamicRelocationSection &Sec) = 0; 99 virtual void visit(GnuDebugLinkSection &Sec) = 0; 100 virtual void visit(GroupSection &Sec) = 0; 101 virtual void visit(SectionIndexSection &Sec) = 0; 102 virtual void visit(CompressedSection &Sec) = 0; 103 virtual void visit(DecompressedSection &Sec) = 0; 104 }; 105 106 class SectionWriter : public SectionVisitor { 107 protected: 108 Buffer &Out; 109 110 public: 111 virtual ~SectionWriter() = default; 112 113 void visit(const Section &Sec) override; 114 void visit(const OwnedDataSection &Sec) override; 115 void visit(const StringTableSection &Sec) override; 116 void visit(const DynamicRelocationSection &Sec) override; 117 virtual void visit(const SymbolTableSection &Sec) override = 0; 118 virtual void visit(const RelocationSection &Sec) override = 0; 119 virtual void visit(const GnuDebugLinkSection &Sec) override = 0; 120 virtual void visit(const GroupSection &Sec) override = 0; 121 virtual void visit(const SectionIndexSection &Sec) override = 0; 122 virtual void visit(const CompressedSection &Sec) override = 0; 123 virtual void visit(const DecompressedSection &Sec) override = 0; 124 SectionWriter(Buffer & Buf)125 explicit SectionWriter(Buffer &Buf) : Out(Buf) {} 126 }; 127 128 template <class ELFT> class ELFSectionWriter : public SectionWriter { 129 private: 130 using Elf_Word = typename ELFT::Word; 131 using Elf_Rel = typename ELFT::Rel; 132 using Elf_Rela = typename ELFT::Rela; 133 using Elf_Sym = typename ELFT::Sym; 134 135 public: ~ELFSectionWriter()136 virtual ~ELFSectionWriter() {} 137 void visit(const SymbolTableSection &Sec) override; 138 void visit(const RelocationSection &Sec) override; 139 void visit(const GnuDebugLinkSection &Sec) override; 140 void visit(const GroupSection &Sec) override; 141 void visit(const SectionIndexSection &Sec) override; 142 void visit(const CompressedSection &Sec) override; 143 void visit(const DecompressedSection &Sec) override; 144 ELFSectionWriter(Buffer & Buf)145 explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {} 146 }; 147 148 template <class ELFT> class ELFSectionSizer : public MutableSectionVisitor { 149 private: 150 using Elf_Rel = typename ELFT::Rel; 151 using Elf_Rela = typename ELFT::Rela; 152 using Elf_Sym = typename ELFT::Sym; 153 using Elf_Word = typename ELFT::Word; 154 using Elf_Xword = typename ELFT::Xword; 155 156 public: 157 void visit(Section &Sec) override; 158 void visit(OwnedDataSection &Sec) override; 159 void visit(StringTableSection &Sec) override; 160 void visit(DynamicRelocationSection &Sec) override; 161 void visit(SymbolTableSection &Sec) override; 162 void visit(RelocationSection &Sec) override; 163 void visit(GnuDebugLinkSection &Sec) override; 164 void visit(GroupSection &Sec) override; 165 void visit(SectionIndexSection &Sec) override; 166 void visit(CompressedSection &Sec) override; 167 void visit(DecompressedSection &Sec) override; 168 }; 169 170 #define MAKE_SEC_WRITER_FRIEND \ 171 friend class SectionWriter; \ 172 friend class IHexSectionWriterBase; \ 173 friend class IHexSectionWriter; \ 174 template <class ELFT> friend class ELFSectionWriter; \ 175 template <class ELFT> friend class ELFSectionSizer; 176 177 class BinarySectionWriter : public SectionWriter { 178 public: ~BinarySectionWriter()179 virtual ~BinarySectionWriter() {} 180 181 void visit(const SymbolTableSection &Sec) override; 182 void visit(const RelocationSection &Sec) override; 183 void visit(const GnuDebugLinkSection &Sec) override; 184 void visit(const GroupSection &Sec) override; 185 void visit(const SectionIndexSection &Sec) override; 186 void visit(const CompressedSection &Sec) override; 187 void visit(const DecompressedSection &Sec) override; 188 BinarySectionWriter(Buffer & Buf)189 explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {} 190 }; 191 192 using IHexLineData = SmallVector<char, 64>; 193 194 struct IHexRecord { 195 // Memory address of the record. 196 uint16_t Addr; 197 // Record type (see below). 198 uint16_t Type; 199 // Record data in hexadecimal form. 200 StringRef HexData; 201 202 // Helper method to get file length of the record 203 // including newline character getLengthIHexRecord204 static size_t getLength(size_t DataSize) { 205 // :LLAAAATT[DD...DD]CC' 206 return DataSize * 2 + 11; 207 } 208 209 // Gets length of line in a file (getLength + CRLF). getLineLengthIHexRecord210 static size_t getLineLength(size_t DataSize) { 211 return getLength(DataSize) + 2; 212 } 213 214 // Given type, address and data returns line which can 215 // be written to output file. 216 static IHexLineData getLine(uint8_t Type, uint16_t Addr, 217 ArrayRef<uint8_t> Data); 218 219 // Parses the line and returns record if possible. 220 // Line should be trimmed from whitespace characters. 221 static Expected<IHexRecord> parse(StringRef Line); 222 223 // Calculates checksum of stringified record representation 224 // S must NOT contain leading ':' and trailing whitespace 225 // characters 226 static uint8_t getChecksum(StringRef S); 227 228 enum Type { 229 // Contains data and a 16-bit starting address for the data. 230 // The byte count specifies number of data bytes in the record. 231 Data = 0, 232 // Must occur exactly once per file in the last line of the file. 233 // The data field is empty (thus byte count is 00) and the address 234 // field is typically 0000. 235 EndOfFile = 1, 236 // The data field contains a 16-bit segment base address (thus byte 237 // count is always 02) compatible with 80x86 real mode addressing. 238 // The address field (typically 0000) is ignored. The segment address 239 // from the most recent 02 record is multiplied by 16 and added to each 240 // subsequent data record address to form the physical starting address 241 // for the data. This allows addressing up to one megabyte of address 242 // space. 243 SegmentAddr = 2, 244 // or 80x86 processors, specifies the initial content of the CS:IP 245 // registers. The address field is 0000, the byte count is always 04, 246 // the first two data bytes are the CS value, the latter two are the 247 // IP value. 248 StartAddr80x86 = 3, 249 // Allows for 32 bit addressing (up to 4GiB). The record's address field 250 // is ignored (typically 0000) and its byte count is always 02. The two 251 // data bytes (big endian) specify the upper 16 bits of the 32 bit 252 // absolute address for all subsequent type 00 records 253 ExtendedAddr = 4, 254 // The address field is 0000 (not used) and the byte count is always 04. 255 // The four data bytes represent a 32-bit address value. In the case of 256 // 80386 and higher CPUs, this address is loaded into the EIP register. 257 StartAddr = 5, 258 // We have no other valid types 259 InvalidType = 6 260 }; 261 }; 262 263 // Base class for IHexSectionWriter. This class implements writing algorithm, 264 // but doesn't actually write records. It is used for output buffer size 265 // calculation in IHexWriter::finalize. 266 class IHexSectionWriterBase : public BinarySectionWriter { 267 // 20-bit segment address 268 uint32_t SegmentAddr = 0; 269 // Extended linear address 270 uint32_t BaseAddr = 0; 271 272 // Write segment address corresponding to 'Addr' 273 uint64_t writeSegmentAddr(uint64_t Addr); 274 // Write extended linear (base) address corresponding to 'Addr' 275 uint64_t writeBaseAddr(uint64_t Addr); 276 277 protected: 278 // Offset in the output buffer 279 uint64_t Offset = 0; 280 281 void writeSection(const SectionBase *Sec, ArrayRef<uint8_t> Data); 282 virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data); 283 284 public: IHexSectionWriterBase(Buffer & Buf)285 explicit IHexSectionWriterBase(Buffer &Buf) : BinarySectionWriter(Buf) {} 286 getBufferOffset()287 uint64_t getBufferOffset() const { return Offset; } 288 void visit(const Section &Sec) final; 289 void visit(const OwnedDataSection &Sec) final; 290 void visit(const StringTableSection &Sec) override; 291 void visit(const DynamicRelocationSection &Sec) final; 292 using BinarySectionWriter::visit; 293 }; 294 295 // Real IHEX section writer 296 class IHexSectionWriter : public IHexSectionWriterBase { 297 public: IHexSectionWriter(Buffer & Buf)298 IHexSectionWriter(Buffer &Buf) : IHexSectionWriterBase(Buf) {} 299 300 void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override; 301 void visit(const StringTableSection &Sec) override; 302 }; 303 304 class Writer { 305 protected: 306 Object &Obj; 307 Buffer &Buf; 308 309 public: 310 virtual ~Writer(); 311 virtual Error finalize() = 0; 312 virtual Error write() = 0; 313 Writer(Object & O,Buffer & B)314 Writer(Object &O, Buffer &B) : Obj(O), Buf(B) {} 315 }; 316 317 template <class ELFT> class ELFWriter : public Writer { 318 private: 319 using Elf_Addr = typename ELFT::Addr; 320 using Elf_Shdr = typename ELFT::Shdr; 321 using Elf_Phdr = typename ELFT::Phdr; 322 using Elf_Ehdr = typename ELFT::Ehdr; 323 324 void initEhdrSegment(); 325 326 void writeEhdr(); 327 void writePhdr(const Segment &Seg); 328 void writeShdr(const SectionBase &Sec); 329 330 void writePhdrs(); 331 void writeShdrs(); 332 void writeSectionData(); 333 void writeSegmentData(); 334 335 void assignOffsets(); 336 337 std::unique_ptr<ELFSectionWriter<ELFT>> SecWriter; 338 339 size_t totalSize() const; 340 341 public: ~ELFWriter()342 virtual ~ELFWriter() {} 343 bool WriteSectionHeaders; 344 345 // For --only-keep-debug, select an alternative section/segment layout 346 // algorithm. 347 bool OnlyKeepDebug; 348 349 Error finalize() override; 350 Error write() override; 351 ELFWriter(Object &Obj, Buffer &Buf, bool WSH, bool OnlyKeepDebug); 352 }; 353 354 class BinaryWriter : public Writer { 355 private: 356 std::unique_ptr<BinarySectionWriter> SecWriter; 357 358 uint64_t TotalSize = 0; 359 360 public: ~BinaryWriter()361 ~BinaryWriter() {} 362 Error finalize() override; 363 Error write() override; BinaryWriter(Object & Obj,Buffer & Buf)364 BinaryWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {} 365 }; 366 367 class IHexWriter : public Writer { 368 struct SectionCompare { 369 bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const; 370 }; 371 372 std::set<const SectionBase *, SectionCompare> Sections; 373 size_t TotalSize = 0; 374 375 Error checkSection(const SectionBase &Sec); 376 uint64_t writeEntryPointRecord(uint8_t *Buf); 377 uint64_t writeEndOfFileRecord(uint8_t *Buf); 378 379 public: ~IHexWriter()380 ~IHexWriter() {} 381 Error finalize() override; 382 Error write() override; IHexWriter(Object & Obj,Buffer & Buf)383 IHexWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {} 384 }; 385 386 class SectionBase { 387 public: 388 std::string Name; 389 Segment *ParentSegment = nullptr; 390 uint64_t HeaderOffset = 0; 391 uint32_t Index = 0; 392 bool HasSymbol = false; 393 394 uint64_t OriginalFlags = 0; 395 uint64_t OriginalType = ELF::SHT_NULL; 396 uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max(); 397 398 uint64_t Addr = 0; 399 uint64_t Align = 1; 400 uint32_t EntrySize = 0; 401 uint64_t Flags = 0; 402 uint64_t Info = 0; 403 uint64_t Link = ELF::SHN_UNDEF; 404 uint64_t NameIndex = 0; 405 uint64_t Offset = 0; 406 uint64_t Size = 0; 407 uint64_t Type = ELF::SHT_NULL; 408 ArrayRef<uint8_t> OriginalData; 409 410 SectionBase() = default; 411 SectionBase(const SectionBase &) = default; 412 413 virtual ~SectionBase() = default; 414 415 virtual void initialize(SectionTableRef SecTable); 416 virtual void finalize(); 417 // Remove references to these sections. The list of sections must be sorted. 418 virtual Error 419 removeSectionReferences(bool AllowBrokenLinks, 420 function_ref<bool(const SectionBase *)> ToRemove); 421 virtual Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove); 422 virtual void accept(SectionVisitor &Visitor) const = 0; 423 virtual void accept(MutableSectionVisitor &Visitor) = 0; 424 virtual void markSymbols(); 425 virtual void 426 replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &); 427 }; 428 429 class Segment { 430 private: 431 struct SectionCompare { operatorSectionCompare432 bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const { 433 // Some sections might have the same address if one of them is empty. To 434 // fix this we can use the lexicographic ordering on ->Addr and the 435 // address of the actully stored section. 436 if (Lhs->OriginalOffset == Rhs->OriginalOffset) 437 return Lhs < Rhs; 438 return Lhs->OriginalOffset < Rhs->OriginalOffset; 439 } 440 }; 441 442 public: 443 uint32_t Type = 0; 444 uint32_t Flags = 0; 445 uint64_t Offset = 0; 446 uint64_t VAddr = 0; 447 uint64_t PAddr = 0; 448 uint64_t FileSize = 0; 449 uint64_t MemSize = 0; 450 uint64_t Align = 0; 451 452 uint32_t Index = 0; 453 uint64_t OriginalOffset = 0; 454 Segment *ParentSegment = nullptr; 455 ArrayRef<uint8_t> Contents; 456 std::set<const SectionBase *, SectionCompare> Sections; 457 Segment(ArrayRef<uint8_t> Data)458 explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {} 459 Segment() = default; 460 firstSection()461 const SectionBase *firstSection() const { 462 if (!Sections.empty()) 463 return *Sections.begin(); 464 return nullptr; 465 } 466 removeSection(const SectionBase * Sec)467 void removeSection(const SectionBase *Sec) { Sections.erase(Sec); } addSection(const SectionBase * Sec)468 void addSection(const SectionBase *Sec) { Sections.insert(Sec); } 469 getContents()470 ArrayRef<uint8_t> getContents() const { return Contents; } 471 }; 472 473 class Section : public SectionBase { 474 MAKE_SEC_WRITER_FRIEND 475 476 ArrayRef<uint8_t> Contents; 477 SectionBase *LinkSection = nullptr; 478 479 public: Section(ArrayRef<uint8_t> Data)480 explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {} 481 482 void accept(SectionVisitor &Visitor) const override; 483 void accept(MutableSectionVisitor &Visitor) override; 484 Error removeSectionReferences(bool AllowBrokenLinks, 485 function_ref<bool(const SectionBase *)> ToRemove) override; 486 void initialize(SectionTableRef SecTable) override; 487 void finalize() override; 488 }; 489 490 class OwnedDataSection : public SectionBase { 491 MAKE_SEC_WRITER_FRIEND 492 493 std::vector<uint8_t> Data; 494 495 public: OwnedDataSection(StringRef SecName,ArrayRef<uint8_t> Data)496 OwnedDataSection(StringRef SecName, ArrayRef<uint8_t> Data) 497 : Data(std::begin(Data), std::end(Data)) { 498 Name = SecName.str(); 499 Type = OriginalType = ELF::SHT_PROGBITS; 500 Size = Data.size(); 501 OriginalOffset = std::numeric_limits<uint64_t>::max(); 502 } 503 OwnedDataSection(const Twine & SecName,uint64_t SecAddr,uint64_t SecFlags,uint64_t SecOff)504 OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags, 505 uint64_t SecOff) { 506 Name = SecName.str(); 507 Type = OriginalType = ELF::SHT_PROGBITS; 508 Addr = SecAddr; 509 Flags = OriginalFlags = SecFlags; 510 OriginalOffset = SecOff; 511 } 512 513 void appendHexData(StringRef HexData); 514 void accept(SectionVisitor &Sec) const override; 515 void accept(MutableSectionVisitor &Visitor) override; 516 }; 517 518 class CompressedSection : public SectionBase { 519 MAKE_SEC_WRITER_FRIEND 520 521 DebugCompressionType CompressionType; 522 uint64_t DecompressedSize; 523 uint64_t DecompressedAlign; 524 SmallVector<char, 128> CompressedData; 525 526 public: 527 CompressedSection(const SectionBase &Sec, 528 DebugCompressionType CompressionType); 529 CompressedSection(ArrayRef<uint8_t> CompressedData, uint64_t DecompressedSize, 530 uint64_t DecompressedAlign); 531 getDecompressedSize()532 uint64_t getDecompressedSize() const { return DecompressedSize; } getDecompressedAlign()533 uint64_t getDecompressedAlign() const { return DecompressedAlign; } 534 535 void accept(SectionVisitor &Visitor) const override; 536 void accept(MutableSectionVisitor &Visitor) override; 537 classof(const SectionBase * S)538 static bool classof(const SectionBase *S) { 539 return (S->OriginalFlags & ELF::SHF_COMPRESSED) || 540 (StringRef(S->Name).startswith(".zdebug")); 541 } 542 }; 543 544 class DecompressedSection : public SectionBase { 545 MAKE_SEC_WRITER_FRIEND 546 547 public: DecompressedSection(const CompressedSection & Sec)548 explicit DecompressedSection(const CompressedSection &Sec) 549 : SectionBase(Sec) { 550 Size = Sec.getDecompressedSize(); 551 Align = Sec.getDecompressedAlign(); 552 Flags = OriginalFlags = (Flags & ~ELF::SHF_COMPRESSED); 553 if (StringRef(Name).startswith(".zdebug")) 554 Name = "." + Name.substr(2); 555 } 556 557 void accept(SectionVisitor &Visitor) const override; 558 void accept(MutableSectionVisitor &Visitor) override; 559 }; 560 561 // There are two types of string tables that can exist, dynamic and not dynamic. 562 // In the dynamic case the string table is allocated. Changing a dynamic string 563 // table would mean altering virtual addresses and thus the memory image. So 564 // dynamic string tables should not have an interface to modify them or 565 // reconstruct them. This type lets us reconstruct a string table. To avoid 566 // this class being used for dynamic string tables (which has happened) the 567 // classof method checks that the particular instance is not allocated. This 568 // then agrees with the makeSection method used to construct most sections. 569 class StringTableSection : public SectionBase { 570 MAKE_SEC_WRITER_FRIEND 571 572 StringTableBuilder StrTabBuilder; 573 574 public: StringTableSection()575 StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) { 576 Type = OriginalType = ELF::SHT_STRTAB; 577 } 578 579 void addString(StringRef Name); 580 uint32_t findIndex(StringRef Name) const; 581 void prepareForLayout(); 582 void accept(SectionVisitor &Visitor) const override; 583 void accept(MutableSectionVisitor &Visitor) override; 584 classof(const SectionBase * S)585 static bool classof(const SectionBase *S) { 586 if (S->OriginalFlags & ELF::SHF_ALLOC) 587 return false; 588 return S->OriginalType == ELF::SHT_STRTAB; 589 } 590 }; 591 592 // Symbols have a st_shndx field that normally stores an index but occasionally 593 // stores a different special value. This enum keeps track of what the st_shndx 594 // field means. Most of the values are just copies of the special SHN_* values. 595 // SYMBOL_SIMPLE_INDEX means that the st_shndx is just an index of a section. 596 enum SymbolShndxType { 597 SYMBOL_SIMPLE_INDEX = 0, 598 SYMBOL_ABS = ELF::SHN_ABS, 599 SYMBOL_COMMON = ELF::SHN_COMMON, 600 SYMBOL_LOPROC = ELF::SHN_LOPROC, 601 SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS, 602 SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON, 603 SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2, 604 SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4, 605 SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8, 606 SYMBOL_HIPROC = ELF::SHN_HIPROC, 607 SYMBOL_LOOS = ELF::SHN_LOOS, 608 SYMBOL_HIOS = ELF::SHN_HIOS, 609 SYMBOL_XINDEX = ELF::SHN_XINDEX, 610 }; 611 612 struct Symbol { 613 uint8_t Binding; 614 SectionBase *DefinedIn = nullptr; 615 SymbolShndxType ShndxType; 616 uint32_t Index; 617 std::string Name; 618 uint32_t NameIndex; 619 uint64_t Size; 620 uint8_t Type; 621 uint64_t Value; 622 uint8_t Visibility; 623 bool Referenced = false; 624 625 uint16_t getShndx() const; 626 bool isCommon() const; 627 }; 628 629 class SectionIndexSection : public SectionBase { 630 MAKE_SEC_WRITER_FRIEND 631 632 private: 633 std::vector<uint32_t> Indexes; 634 SymbolTableSection *Symbols = nullptr; 635 636 public: ~SectionIndexSection()637 virtual ~SectionIndexSection() {} addIndex(uint32_t Index)638 void addIndex(uint32_t Index) { 639 assert(Size > 0); 640 Indexes.push_back(Index); 641 } 642 reserve(size_t NumSymbols)643 void reserve(size_t NumSymbols) { 644 Indexes.reserve(NumSymbols); 645 Size = NumSymbols * 4; 646 } setSymTab(SymbolTableSection * SymTab)647 void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; } 648 void initialize(SectionTableRef SecTable) override; 649 void finalize() override; 650 void accept(SectionVisitor &Visitor) const override; 651 void accept(MutableSectionVisitor &Visitor) override; 652 SectionIndexSection()653 SectionIndexSection() { 654 Name = ".symtab_shndx"; 655 Align = 4; 656 EntrySize = 4; 657 Type = OriginalType = ELF::SHT_SYMTAB_SHNDX; 658 } 659 }; 660 661 class SymbolTableSection : public SectionBase { 662 MAKE_SEC_WRITER_FRIEND 663 setStrTab(StringTableSection * StrTab)664 void setStrTab(StringTableSection *StrTab) { SymbolNames = StrTab; } 665 void assignIndices(); 666 667 protected: 668 std::vector<std::unique_ptr<Symbol>> Symbols; 669 StringTableSection *SymbolNames = nullptr; 670 SectionIndexSection *SectionIndexTable = nullptr; 671 672 using SymPtr = std::unique_ptr<Symbol>; 673 674 public: SymbolTableSection()675 SymbolTableSection() { Type = OriginalType = ELF::SHT_SYMTAB; } 676 677 void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn, 678 uint64_t Value, uint8_t Visibility, uint16_t Shndx, 679 uint64_t SymbolSize); 680 void prepareForLayout(); 681 // An 'empty' symbol table still contains a null symbol. empty()682 bool empty() const { return Symbols.size() == 1; } setShndxTable(SectionIndexSection * ShndxTable)683 void setShndxTable(SectionIndexSection *ShndxTable) { 684 SectionIndexTable = ShndxTable; 685 } getShndxTable()686 const SectionIndexSection *getShndxTable() const { return SectionIndexTable; } 687 void fillShndxTable(); getStrTab()688 const SectionBase *getStrTab() const { return SymbolNames; } 689 const Symbol *getSymbolByIndex(uint32_t Index) const; 690 Symbol *getSymbolByIndex(uint32_t Index); 691 void updateSymbols(function_ref<void(Symbol &)> Callable); 692 693 Error removeSectionReferences(bool AllowBrokenLinks, 694 function_ref<bool(const SectionBase *)> ToRemove) override; 695 void initialize(SectionTableRef SecTable) override; 696 void finalize() override; 697 void accept(SectionVisitor &Visitor) const override; 698 void accept(MutableSectionVisitor &Visitor) override; 699 Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override; 700 void replaceSectionReferences( 701 const DenseMap<SectionBase *, SectionBase *> &FromTo) override; 702 classof(const SectionBase * S)703 static bool classof(const SectionBase *S) { 704 return S->OriginalType == ELF::SHT_SYMTAB; 705 } 706 }; 707 708 struct Relocation { 709 Symbol *RelocSymbol = nullptr; 710 uint64_t Offset; 711 uint64_t Addend; 712 uint32_t Type; 713 }; 714 715 // All relocation sections denote relocations to apply to another section. 716 // However, some relocation sections use a dynamic symbol table and others use 717 // a regular symbol table. Because the types of the two symbol tables differ in 718 // our system (because they should behave differently) we can't uniformly 719 // represent all relocations with the same base class if we expose an interface 720 // that mentions the symbol table type. So we split the two base types into two 721 // different classes, one which handles the section the relocation is applied to 722 // and another which handles the symbol table type. The symbol table type is 723 // taken as a type parameter to the class (see RelocSectionWithSymtabBase). 724 class RelocationSectionBase : public SectionBase { 725 protected: 726 SectionBase *SecToApplyRel = nullptr; 727 728 public: getSection()729 const SectionBase *getSection() const { return SecToApplyRel; } setSection(SectionBase * Sec)730 void setSection(SectionBase *Sec) { SecToApplyRel = Sec; } 731 classof(const SectionBase * S)732 static bool classof(const SectionBase *S) { 733 return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; 734 } 735 }; 736 737 // Takes the symbol table type to use as a parameter so that we can deduplicate 738 // that code between the two symbol table types. 739 template <class SymTabType> 740 class RelocSectionWithSymtabBase : public RelocationSectionBase { setSymTab(SymTabType * SymTab)741 void setSymTab(SymTabType *SymTab) { Symbols = SymTab; } 742 743 protected: 744 RelocSectionWithSymtabBase() = default; 745 746 SymTabType *Symbols = nullptr; 747 748 public: 749 void initialize(SectionTableRef SecTable) override; 750 void finalize() override; 751 }; 752 753 class RelocationSection 754 : public RelocSectionWithSymtabBase<SymbolTableSection> { 755 MAKE_SEC_WRITER_FRIEND 756 757 std::vector<Relocation> Relocations; 758 759 public: addRelocation(Relocation Rel)760 void addRelocation(Relocation Rel) { Relocations.push_back(Rel); } 761 void accept(SectionVisitor &Visitor) const override; 762 void accept(MutableSectionVisitor &Visitor) override; 763 Error removeSectionReferences(bool AllowBrokenLinks, 764 function_ref<bool(const SectionBase *)> ToRemove) override; 765 Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override; 766 void markSymbols() override; 767 void replaceSectionReferences( 768 const DenseMap<SectionBase *, SectionBase *> &FromTo) override; 769 classof(const SectionBase * S)770 static bool classof(const SectionBase *S) { 771 if (S->OriginalFlags & ELF::SHF_ALLOC) 772 return false; 773 return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; 774 } 775 }; 776 777 // TODO: The way stripping and groups interact is complicated 778 // and still needs to be worked on. 779 780 class GroupSection : public SectionBase { 781 MAKE_SEC_WRITER_FRIEND 782 const SymbolTableSection *SymTab = nullptr; 783 Symbol *Sym = nullptr; 784 ELF::Elf32_Word FlagWord; 785 SmallVector<SectionBase *, 3> GroupMembers; 786 787 public: 788 // TODO: Contents is present in several classes of the hierarchy. 789 // This needs to be refactored to avoid duplication. 790 ArrayRef<uint8_t> Contents; 791 GroupSection(ArrayRef<uint8_t> Data)792 explicit GroupSection(ArrayRef<uint8_t> Data) : Contents(Data) {} 793 setSymTab(const SymbolTableSection * SymTabSec)794 void setSymTab(const SymbolTableSection *SymTabSec) { SymTab = SymTabSec; } setSymbol(Symbol * S)795 void setSymbol(Symbol *S) { Sym = S; } setFlagWord(ELF::Elf32_Word W)796 void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; } addMember(SectionBase * Sec)797 void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); } 798 799 void accept(SectionVisitor &) const override; 800 void accept(MutableSectionVisitor &Visitor) override; 801 void finalize() override; 802 Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override; 803 void markSymbols() override; 804 void replaceSectionReferences( 805 const DenseMap<SectionBase *, SectionBase *> &FromTo) override; 806 classof(const SectionBase * S)807 static bool classof(const SectionBase *S) { 808 return S->OriginalType == ELF::SHT_GROUP; 809 } 810 }; 811 812 class DynamicSymbolTableSection : public Section { 813 public: DynamicSymbolTableSection(ArrayRef<uint8_t> Data)814 explicit DynamicSymbolTableSection(ArrayRef<uint8_t> Data) : Section(Data) {} 815 classof(const SectionBase * S)816 static bool classof(const SectionBase *S) { 817 return S->OriginalType == ELF::SHT_DYNSYM; 818 } 819 }; 820 821 class DynamicSection : public Section { 822 public: DynamicSection(ArrayRef<uint8_t> Data)823 explicit DynamicSection(ArrayRef<uint8_t> Data) : Section(Data) {} 824 classof(const SectionBase * S)825 static bool classof(const SectionBase *S) { 826 return S->OriginalType == ELF::SHT_DYNAMIC; 827 } 828 }; 829 830 class DynamicRelocationSection 831 : public RelocSectionWithSymtabBase<DynamicSymbolTableSection> { 832 MAKE_SEC_WRITER_FRIEND 833 834 private: 835 ArrayRef<uint8_t> Contents; 836 837 public: DynamicRelocationSection(ArrayRef<uint8_t> Data)838 explicit DynamicRelocationSection(ArrayRef<uint8_t> Data) : Contents(Data) {} 839 840 void accept(SectionVisitor &) const override; 841 void accept(MutableSectionVisitor &Visitor) override; 842 Error removeSectionReferences( 843 bool AllowBrokenLinks, 844 function_ref<bool(const SectionBase *)> ToRemove) override; 845 classof(const SectionBase * S)846 static bool classof(const SectionBase *S) { 847 if (!(S->OriginalFlags & ELF::SHF_ALLOC)) 848 return false; 849 return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA; 850 } 851 }; 852 853 class GnuDebugLinkSection : public SectionBase { 854 MAKE_SEC_WRITER_FRIEND 855 856 private: 857 StringRef FileName; 858 uint32_t CRC32; 859 860 void init(StringRef File); 861 862 public: 863 // If we add this section from an external source we can use this ctor. 864 explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC); 865 void accept(SectionVisitor &Visitor) const override; 866 void accept(MutableSectionVisitor &Visitor) override; 867 }; 868 869 class Reader { 870 public: 871 virtual ~Reader(); 872 virtual std::unique_ptr<Object> create(bool EnsureSymtab) const = 0; 873 }; 874 875 using object::Binary; 876 using object::ELFFile; 877 using object::ELFObjectFile; 878 using object::OwningBinary; 879 880 class BasicELFBuilder { 881 protected: 882 std::unique_ptr<Object> Obj; 883 884 void initFileHeader(); 885 void initHeaderSegment(); 886 StringTableSection *addStrTab(); 887 SymbolTableSection *addSymTab(StringTableSection *StrTab); 888 void initSections(); 889 890 public: BasicELFBuilder()891 BasicELFBuilder() : Obj(std::make_unique<Object>()) {} 892 }; 893 894 class BinaryELFBuilder : public BasicELFBuilder { 895 MemoryBuffer *MemBuf; 896 uint8_t NewSymbolVisibility; 897 void addData(SymbolTableSection *SymTab); 898 899 public: BinaryELFBuilder(MemoryBuffer * MB,uint8_t NewSymbolVisibility)900 BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility) 901 : BasicELFBuilder(), MemBuf(MB), 902 NewSymbolVisibility(NewSymbolVisibility) {} 903 904 std::unique_ptr<Object> build(); 905 }; 906 907 class IHexELFBuilder : public BasicELFBuilder { 908 const std::vector<IHexRecord> &Records; 909 910 void addDataSections(); 911 912 public: IHexELFBuilder(const std::vector<IHexRecord> & Records)913 IHexELFBuilder(const std::vector<IHexRecord> &Records) 914 : BasicELFBuilder(), Records(Records) {} 915 916 std::unique_ptr<Object> build(); 917 }; 918 919 template <class ELFT> class ELFBuilder { 920 private: 921 using Elf_Addr = typename ELFT::Addr; 922 using Elf_Shdr = typename ELFT::Shdr; 923 using Elf_Word = typename ELFT::Word; 924 925 const ELFFile<ELFT> &ElfFile; 926 Object &Obj; 927 size_t EhdrOffset = 0; 928 Optional<StringRef> ExtractPartition; 929 930 void setParentSegment(Segment &Child); 931 void readProgramHeaders(const ELFFile<ELFT> &HeadersFile); 932 void initGroupSection(GroupSection *GroupSec); 933 void initSymbolTable(SymbolTableSection *SymTab); 934 void readSectionHeaders(); 935 void readSections(bool EnsureSymtab); 936 void findEhdrOffset(); 937 SectionBase &makeSection(const Elf_Shdr &Shdr); 938 939 public: ELFBuilder(const ELFObjectFile<ELFT> & ElfObj,Object & Obj,Optional<StringRef> ExtractPartition)940 ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj, 941 Optional<StringRef> ExtractPartition) 942 : ElfFile(*ElfObj.getELFFile()), Obj(Obj), 943 ExtractPartition(ExtractPartition) {} 944 945 void build(bool EnsureSymtab); 946 }; 947 948 class BinaryReader : public Reader { 949 MemoryBuffer *MemBuf; 950 uint8_t NewSymbolVisibility; 951 952 public: BinaryReader(MemoryBuffer * MB,const uint8_t NewSymbolVisibility)953 BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility) 954 : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} 955 std::unique_ptr<Object> create(bool EnsureSymtab) const override; 956 }; 957 958 class IHexReader : public Reader { 959 MemoryBuffer *MemBuf; 960 961 Expected<std::vector<IHexRecord>> parse() const; parseError(size_t LineNo,Error E)962 Error parseError(size_t LineNo, Error E) const { 963 return LineNo == -1U 964 ? createFileError(MemBuf->getBufferIdentifier(), std::move(E)) 965 : createFileError(MemBuf->getBufferIdentifier(), LineNo, 966 std::move(E)); 967 } 968 template <typename... Ts> parseError(size_t LineNo,char const * Fmt,const Ts &...Vals)969 Error parseError(size_t LineNo, char const *Fmt, const Ts &... Vals) const { 970 Error E = createStringError(errc::invalid_argument, Fmt, Vals...); 971 return parseError(LineNo, std::move(E)); 972 } 973 974 public: IHexReader(MemoryBuffer * MB)975 IHexReader(MemoryBuffer *MB) : MemBuf(MB) {} 976 977 std::unique_ptr<Object> create(bool EnsureSymtab) const override; 978 }; 979 980 class ELFReader : public Reader { 981 Binary *Bin; 982 Optional<StringRef> ExtractPartition; 983 984 public: 985 std::unique_ptr<Object> create(bool EnsureSymtab) const override; ELFReader(Binary * B,Optional<StringRef> ExtractPartition)986 explicit ELFReader(Binary *B, Optional<StringRef> ExtractPartition) 987 : Bin(B), ExtractPartition(ExtractPartition) {} 988 }; 989 990 class Object { 991 private: 992 using SecPtr = std::unique_ptr<SectionBase>; 993 using SegPtr = std::unique_ptr<Segment>; 994 995 std::vector<SecPtr> Sections; 996 std::vector<SegPtr> Segments; 997 std::vector<SecPtr> RemovedSections; 998 sectionIsAlloc(const SectionBase & Sec)999 static bool sectionIsAlloc(const SectionBase &Sec) { 1000 return Sec.Flags & ELF::SHF_ALLOC; 1001 }; 1002 1003 public: 1004 template <class T> 1005 using Range = iterator_range< 1006 pointee_iterator<typename std::vector<std::unique_ptr<T>>::iterator>>; 1007 1008 template <class T> 1009 using ConstRange = iterator_range<pointee_iterator< 1010 typename std::vector<std::unique_ptr<T>>::const_iterator>>; 1011 1012 // It is often the case that the ELF header and the program header table are 1013 // not present in any segment. This could be a problem during file layout, 1014 // because other segments may get assigned an offset where either of the 1015 // two should reside, which will effectively corrupt the resulting binary. 1016 // Other than that we use these segments to track program header offsets 1017 // when they may not follow the ELF header. 1018 Segment ElfHdrSegment; 1019 Segment ProgramHdrSegment; 1020 1021 uint8_t OSABI; 1022 uint8_t ABIVersion; 1023 uint64_t Entry; 1024 uint64_t SHOff; 1025 uint32_t Type; 1026 uint32_t Machine; 1027 uint32_t Version; 1028 uint32_t Flags; 1029 1030 bool HadShdrs = true; 1031 bool MustBeRelocatable = false; 1032 StringTableSection *SectionNames = nullptr; 1033 SymbolTableSection *SymbolTable = nullptr; 1034 SectionIndexSection *SectionIndexTable = nullptr; 1035 1036 void sortSections(); sections()1037 SectionTableRef sections() { return SectionTableRef(Sections); } sections()1038 ConstRange<SectionBase> sections() const { 1039 return make_pointee_range(Sections); 1040 } 1041 iterator_range< 1042 filter_iterator<pointee_iterator<std::vector<SecPtr>::const_iterator>, 1043 decltype(§ionIsAlloc)>> allocSections()1044 allocSections() const { 1045 return make_filter_range(make_pointee_range(Sections), sectionIsAlloc); 1046 } 1047 findSection(StringRef Name)1048 SectionBase *findSection(StringRef Name) { 1049 auto SecIt = 1050 find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; }); 1051 return SecIt == Sections.end() ? nullptr : SecIt->get(); 1052 } removedSections()1053 SectionTableRef removedSections() { return SectionTableRef(RemovedSections); } 1054 segments()1055 Range<Segment> segments() { return make_pointee_range(Segments); } segments()1056 ConstRange<Segment> segments() const { return make_pointee_range(Segments); } 1057 1058 Error removeSections(bool AllowBrokenLinks, 1059 std::function<bool(const SectionBase &)> ToRemove); 1060 Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove); addSection(Ts &&...Args)1061 template <class T, class... Ts> T &addSection(Ts &&... Args) { 1062 auto Sec = std::make_unique<T>(std::forward<Ts>(Args)...); 1063 auto Ptr = Sec.get(); 1064 MustBeRelocatable |= isa<RelocationSection>(*Ptr); 1065 Sections.emplace_back(std::move(Sec)); 1066 Ptr->Index = Sections.size(); 1067 return *Ptr; 1068 } addSegment(ArrayRef<uint8_t> Data)1069 Segment &addSegment(ArrayRef<uint8_t> Data) { 1070 Segments.emplace_back(std::make_unique<Segment>(Data)); 1071 return *Segments.back(); 1072 } isRelocatable()1073 bool isRelocatable() const { 1074 return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable; 1075 } 1076 }; 1077 1078 } // end namespace elf 1079 } // end namespace objcopy 1080 } // end namespace llvm 1081 1082 #endif // LLVM_TOOLS_OBJCOPY_OBJECT_H 1083