1 //===- ELFObject.h ----------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H
10 #define LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H
11 
12 #include "llvm/ADT/ArrayRef.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/ADT/Twine.h"
15 #include "llvm/BinaryFormat/ELF.h"
16 #include "llvm/MC/StringTableBuilder.h"
17 #include "llvm/ObjCopy/CommonConfig.h"
18 #include "llvm/Object/ELFObjectFile.h"
19 #include "llvm/Support/Errc.h"
20 #include "llvm/Support/FileOutputBuffer.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include <cstddef>
23 #include <cstdint>
24 #include <functional>
25 #include <memory>
26 #include <set>
27 #include <vector>
28 
29 namespace llvm {
30 enum class DebugCompressionType;
31 namespace objcopy {
32 namespace elf {
33 
34 class SectionBase;
35 class Section;
36 class OwnedDataSection;
37 class StringTableSection;
38 class SymbolTableSection;
39 class RelocationSection;
40 class DynamicRelocationSection;
41 class GnuDebugLinkSection;
42 class GroupSection;
43 class SectionIndexSection;
44 class CompressedSection;
45 class DecompressedSection;
46 class Segment;
47 class Object;
48 struct Symbol;
49 
50 class SectionTableRef {
51   ArrayRef<std::unique_ptr<SectionBase>> Sections;
52 
53 public:
54   using iterator = pointee_iterator<const std::unique_ptr<SectionBase> *>;
55 
SectionTableRef(ArrayRef<std::unique_ptr<SectionBase>> Secs)56   explicit SectionTableRef(ArrayRef<std::unique_ptr<SectionBase>> Secs)
57       : Sections(Secs) {}
58   SectionTableRef(const SectionTableRef &) = default;
59 
begin()60   iterator begin() const { return iterator(Sections.data()); }
end()61   iterator end() const { return iterator(Sections.data() + Sections.size()); }
size()62   size_t size() const { return Sections.size(); }
63 
64   Expected<SectionBase *> getSection(uint32_t Index, Twine ErrMsg);
65 
66   template <class T>
67   Expected<T *> getSectionOfType(uint32_t Index, Twine IndexErrMsg,
68                                  Twine TypeErrMsg);
69 };
70 
71 enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE };
72 
73 class SectionVisitor {
74 public:
75   virtual ~SectionVisitor() = default;
76 
77   virtual Error visit(const Section &Sec) = 0;
78   virtual Error visit(const OwnedDataSection &Sec) = 0;
79   virtual Error visit(const StringTableSection &Sec) = 0;
80   virtual Error visit(const SymbolTableSection &Sec) = 0;
81   virtual Error visit(const RelocationSection &Sec) = 0;
82   virtual Error visit(const DynamicRelocationSection &Sec) = 0;
83   virtual Error visit(const GnuDebugLinkSection &Sec) = 0;
84   virtual Error visit(const GroupSection &Sec) = 0;
85   virtual Error visit(const SectionIndexSection &Sec) = 0;
86   virtual Error visit(const CompressedSection &Sec) = 0;
87   virtual Error visit(const DecompressedSection &Sec) = 0;
88 };
89 
90 class MutableSectionVisitor {
91 public:
92   virtual ~MutableSectionVisitor() = default;
93 
94   virtual Error visit(Section &Sec) = 0;
95   virtual Error visit(OwnedDataSection &Sec) = 0;
96   virtual Error visit(StringTableSection &Sec) = 0;
97   virtual Error visit(SymbolTableSection &Sec) = 0;
98   virtual Error visit(RelocationSection &Sec) = 0;
99   virtual Error visit(DynamicRelocationSection &Sec) = 0;
100   virtual Error visit(GnuDebugLinkSection &Sec) = 0;
101   virtual Error visit(GroupSection &Sec) = 0;
102   virtual Error visit(SectionIndexSection &Sec) = 0;
103   virtual Error visit(CompressedSection &Sec) = 0;
104   virtual Error visit(DecompressedSection &Sec) = 0;
105 };
106 
107 class SectionWriter : public SectionVisitor {
108 protected:
109   WritableMemoryBuffer &Out;
110 
111 public:
112   virtual ~SectionWriter() = default;
113 
114   Error visit(const Section &Sec) override;
115   Error visit(const OwnedDataSection &Sec) override;
116   Error visit(const StringTableSection &Sec) override;
117   Error visit(const DynamicRelocationSection &Sec) override;
118   Error visit(const SymbolTableSection &Sec) override = 0;
119   Error visit(const RelocationSection &Sec) override = 0;
120   Error visit(const GnuDebugLinkSection &Sec) override = 0;
121   Error visit(const GroupSection &Sec) override = 0;
122   Error visit(const SectionIndexSection &Sec) override = 0;
123   Error visit(const CompressedSection &Sec) override = 0;
124   Error visit(const DecompressedSection &Sec) override = 0;
125 
SectionWriter(WritableMemoryBuffer & Buf)126   explicit SectionWriter(WritableMemoryBuffer &Buf) : Out(Buf) {}
127 };
128 
129 template <class ELFT> class ELFSectionWriter : public SectionWriter {
130 private:
131   using Elf_Word = typename ELFT::Word;
132   using Elf_Rel = typename ELFT::Rel;
133   using Elf_Rela = typename ELFT::Rela;
134   using Elf_Sym = typename ELFT::Sym;
135 
136 public:
~ELFSectionWriter()137   virtual ~ELFSectionWriter() {}
138   Error visit(const SymbolTableSection &Sec) override;
139   Error visit(const RelocationSection &Sec) override;
140   Error visit(const GnuDebugLinkSection &Sec) override;
141   Error visit(const GroupSection &Sec) override;
142   Error visit(const SectionIndexSection &Sec) override;
143   Error visit(const CompressedSection &Sec) override;
144   Error visit(const DecompressedSection &Sec) override;
145 
ELFSectionWriter(WritableMemoryBuffer & Buf)146   explicit ELFSectionWriter(WritableMemoryBuffer &Buf) : SectionWriter(Buf) {}
147 };
148 
149 template <class ELFT> class ELFSectionSizer : public MutableSectionVisitor {
150 private:
151   using Elf_Rel = typename ELFT::Rel;
152   using Elf_Rela = typename ELFT::Rela;
153   using Elf_Sym = typename ELFT::Sym;
154   using Elf_Word = typename ELFT::Word;
155   using Elf_Xword = typename ELFT::Xword;
156 
157 public:
158   Error visit(Section &Sec) override;
159   Error visit(OwnedDataSection &Sec) override;
160   Error visit(StringTableSection &Sec) override;
161   Error visit(DynamicRelocationSection &Sec) override;
162   Error visit(SymbolTableSection &Sec) override;
163   Error visit(RelocationSection &Sec) override;
164   Error visit(GnuDebugLinkSection &Sec) override;
165   Error visit(GroupSection &Sec) override;
166   Error visit(SectionIndexSection &Sec) override;
167   Error visit(CompressedSection &Sec) override;
168   Error visit(DecompressedSection &Sec) override;
169 };
170 
171 #define MAKE_SEC_WRITER_FRIEND                                                 \
172   friend class SectionWriter;                                                  \
173   friend class IHexSectionWriterBase;                                          \
174   friend class IHexSectionWriter;                                              \
175   template <class ELFT> friend class ELFSectionWriter;                         \
176   template <class ELFT> friend class ELFSectionSizer;
177 
178 class BinarySectionWriter : public SectionWriter {
179 public:
~BinarySectionWriter()180   virtual ~BinarySectionWriter() {}
181 
182   Error visit(const SymbolTableSection &Sec) override;
183   Error visit(const RelocationSection &Sec) override;
184   Error visit(const GnuDebugLinkSection &Sec) override;
185   Error visit(const GroupSection &Sec) override;
186   Error visit(const SectionIndexSection &Sec) override;
187   Error visit(const CompressedSection &Sec) override;
188   Error visit(const DecompressedSection &Sec) override;
189 
BinarySectionWriter(WritableMemoryBuffer & Buf)190   explicit BinarySectionWriter(WritableMemoryBuffer &Buf)
191       : SectionWriter(Buf) {}
192 };
193 
194 using IHexLineData = SmallVector<char, 64>;
195 
196 struct IHexRecord {
197   // Memory address of the record.
198   uint16_t Addr;
199   // Record type (see below).
200   uint16_t Type;
201   // Record data in hexadecimal form.
202   StringRef HexData;
203 
204   // Helper method to get file length of the record
205   // including newline character
getLengthIHexRecord206   static size_t getLength(size_t DataSize) {
207     // :LLAAAATT[DD...DD]CC'
208     return DataSize * 2 + 11;
209   }
210 
211   // Gets length of line in a file (getLength + CRLF).
getLineLengthIHexRecord212   static size_t getLineLength(size_t DataSize) {
213     return getLength(DataSize) + 2;
214   }
215 
216   // Given type, address and data returns line which can
217   // be written to output file.
218   static IHexLineData getLine(uint8_t Type, uint16_t Addr,
219                               ArrayRef<uint8_t> Data);
220 
221   // Parses the line and returns record if possible.
222   // Line should be trimmed from whitespace characters.
223   static Expected<IHexRecord> parse(StringRef Line);
224 
225   // Calculates checksum of stringified record representation
226   // S must NOT contain leading ':' and trailing whitespace
227   // characters
228   static uint8_t getChecksum(StringRef S);
229 
230   enum Type {
231     // Contains data and a 16-bit starting address for the data.
232     // The byte count specifies number of data bytes in the record.
233     Data = 0,
234     // Must occur exactly once per file in the last line of the file.
235     // The data field is empty (thus byte count is 00) and the address
236     // field is typically 0000.
237     EndOfFile = 1,
238     // The data field contains a 16-bit segment base address (thus byte
239     // count is always 02) compatible with 80x86 real mode addressing.
240     // The address field (typically 0000) is ignored. The segment address
241     // from the most recent 02 record is multiplied by 16 and added to each
242     // subsequent data record address to form the physical starting address
243     // for the data. This allows addressing up to one megabyte of address
244     // space.
245     SegmentAddr = 2,
246     // or 80x86 processors, specifies the initial content of the CS:IP
247     // registers. The address field is 0000, the byte count is always 04,
248     // the first two data bytes are the CS value, the latter two are the
249     // IP value.
250     StartAddr80x86 = 3,
251     // Allows for 32 bit addressing (up to 4GiB). The record's address field
252     // is ignored (typically 0000) and its byte count is always 02. The two
253     // data bytes (big endian) specify the upper 16 bits of the 32 bit
254     // absolute address for all subsequent type 00 records
255     ExtendedAddr = 4,
256     // The address field is 0000 (not used) and the byte count is always 04.
257     // The four data bytes represent a 32-bit address value. In the case of
258     // 80386 and higher CPUs, this address is loaded into the EIP register.
259     StartAddr = 5,
260     // We have no other valid types
261     InvalidType = 6
262   };
263 };
264 
265 // Base class for IHexSectionWriter. This class implements writing algorithm,
266 // but doesn't actually write records. It is used for output buffer size
267 // calculation in IHexWriter::finalize.
268 class IHexSectionWriterBase : public BinarySectionWriter {
269   // 20-bit segment address
270   uint32_t SegmentAddr = 0;
271   // Extended linear address
272   uint32_t BaseAddr = 0;
273 
274   // Write segment address corresponding to 'Addr'
275   uint64_t writeSegmentAddr(uint64_t Addr);
276   // Write extended linear (base) address corresponding to 'Addr'
277   uint64_t writeBaseAddr(uint64_t Addr);
278 
279 protected:
280   // Offset in the output buffer
281   uint64_t Offset = 0;
282 
283   void writeSection(const SectionBase *Sec, ArrayRef<uint8_t> Data);
284   virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data);
285 
286 public:
IHexSectionWriterBase(WritableMemoryBuffer & Buf)287   explicit IHexSectionWriterBase(WritableMemoryBuffer &Buf)
288       : BinarySectionWriter(Buf) {}
289 
getBufferOffset()290   uint64_t getBufferOffset() const { return Offset; }
291   Error visit(const Section &Sec) final;
292   Error visit(const OwnedDataSection &Sec) final;
293   Error visit(const StringTableSection &Sec) override;
294   Error visit(const DynamicRelocationSection &Sec) final;
295   using BinarySectionWriter::visit;
296 };
297 
298 // Real IHEX section writer
299 class IHexSectionWriter : public IHexSectionWriterBase {
300 public:
IHexSectionWriter(WritableMemoryBuffer & Buf)301   IHexSectionWriter(WritableMemoryBuffer &Buf) : IHexSectionWriterBase(Buf) {}
302 
303   void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override;
304   Error visit(const StringTableSection &Sec) override;
305 };
306 
307 class Writer {
308 protected:
309   Object &Obj;
310   std::unique_ptr<WritableMemoryBuffer> Buf;
311   raw_ostream &Out;
312 
313 public:
314   virtual ~Writer();
315   virtual Error finalize() = 0;
316   virtual Error write() = 0;
317 
Writer(Object & O,raw_ostream & Out)318   Writer(Object &O, raw_ostream &Out) : Obj(O), Out(Out) {}
319 };
320 
321 template <class ELFT> class ELFWriter : public Writer {
322 private:
323   using Elf_Addr = typename ELFT::Addr;
324   using Elf_Shdr = typename ELFT::Shdr;
325   using Elf_Phdr = typename ELFT::Phdr;
326   using Elf_Ehdr = typename ELFT::Ehdr;
327 
328   void initEhdrSegment();
329 
330   void writeEhdr();
331   void writePhdr(const Segment &Seg);
332   void writeShdr(const SectionBase &Sec);
333 
334   void writePhdrs();
335   void writeShdrs();
336   Error writeSectionData();
337   void writeSegmentData();
338 
339   void assignOffsets();
340 
341   std::unique_ptr<ELFSectionWriter<ELFT>> SecWriter;
342 
343   size_t totalSize() const;
344 
345 public:
~ELFWriter()346   virtual ~ELFWriter() {}
347   bool WriteSectionHeaders;
348 
349   // For --only-keep-debug, select an alternative section/segment layout
350   // algorithm.
351   bool OnlyKeepDebug;
352 
353   Error finalize() override;
354   Error write() override;
355   ELFWriter(Object &Obj, raw_ostream &Out, bool WSH, bool OnlyKeepDebug);
356 };
357 
358 class BinaryWriter : public Writer {
359 private:
360   const uint8_t GapFill;
361   const uint64_t PadTo;
362   std::unique_ptr<BinarySectionWriter> SecWriter;
363 
364   uint64_t TotalSize = 0;
365 
366 public:
~BinaryWriter()367   ~BinaryWriter() {}
368   Error finalize() override;
369   Error write() override;
BinaryWriter(Object & Obj,raw_ostream & Out,const CommonConfig & Config)370   BinaryWriter(Object &Obj, raw_ostream &Out, const CommonConfig &Config)
371       : Writer(Obj, Out), GapFill(Config.GapFill), PadTo(Config.PadTo) {}
372 };
373 
374 class IHexWriter : public Writer {
375   struct SectionCompare {
376     bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const;
377   };
378 
379   std::set<const SectionBase *, SectionCompare> Sections;
380   size_t TotalSize = 0;
381 
382   Error checkSection(const SectionBase &Sec);
383   uint64_t writeEntryPointRecord(uint8_t *Buf);
384   uint64_t writeEndOfFileRecord(uint8_t *Buf);
385 
386 public:
~IHexWriter()387   ~IHexWriter() {}
388   Error finalize() override;
389   Error write() override;
IHexWriter(Object & Obj,raw_ostream & Out)390   IHexWriter(Object &Obj, raw_ostream &Out) : Writer(Obj, Out) {}
391 };
392 
393 class SectionBase {
394 public:
395   std::string Name;
396   Segment *ParentSegment = nullptr;
397   uint64_t HeaderOffset = 0;
398   uint32_t Index = 0;
399 
400   uint32_t OriginalIndex = 0;
401   uint64_t OriginalFlags = 0;
402   uint64_t OriginalType = ELF::SHT_NULL;
403   uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max();
404 
405   uint64_t Addr = 0;
406   uint64_t Align = 1;
407   uint32_t EntrySize = 0;
408   uint64_t Flags = 0;
409   uint64_t Info = 0;
410   uint64_t Link = ELF::SHN_UNDEF;
411   uint64_t NameIndex = 0;
412   uint64_t Offset = 0;
413   uint64_t Size = 0;
414   uint64_t Type = ELF::SHT_NULL;
415   ArrayRef<uint8_t> OriginalData;
416   bool HasSymbol = false;
417 
418   SectionBase() = default;
419   SectionBase(const SectionBase &) = default;
420 
421   virtual ~SectionBase() = default;
422 
423   virtual Error initialize(SectionTableRef SecTable);
424   virtual void finalize();
425   // Remove references to these sections. The list of sections must be sorted.
426   virtual Error
427   removeSectionReferences(bool AllowBrokenLinks,
428                           function_ref<bool(const SectionBase *)> ToRemove);
429   virtual Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
430   virtual Error accept(SectionVisitor &Visitor) const = 0;
431   virtual Error accept(MutableSectionVisitor &Visitor) = 0;
432   virtual void markSymbols();
433   virtual void
434   replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &);
hasContents()435   virtual bool hasContents() const { return false; }
436   // Notify the section that it is subject to removal.
437   virtual void onRemove();
438 
restoreSymTabLink(SymbolTableSection &)439   virtual void restoreSymTabLink(SymbolTableSection &) {}
440 };
441 
442 class Segment {
443 private:
444   struct SectionCompare {
operatorSectionCompare445     bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const {
446       // Some sections might have the same address if one of them is empty. To
447       // fix this we can use the lexicographic ordering on ->Addr and the
448       // original index.
449       if (Lhs->OriginalOffset == Rhs->OriginalOffset)
450         return Lhs->OriginalIndex < Rhs->OriginalIndex;
451       return Lhs->OriginalOffset < Rhs->OriginalOffset;
452     }
453   };
454 
455 public:
456   uint32_t Type = 0;
457   uint32_t Flags = 0;
458   uint64_t Offset = 0;
459   uint64_t VAddr = 0;
460   uint64_t PAddr = 0;
461   uint64_t FileSize = 0;
462   uint64_t MemSize = 0;
463   uint64_t Align = 0;
464 
465   uint32_t Index = 0;
466   uint64_t OriginalOffset = 0;
467   Segment *ParentSegment = nullptr;
468   ArrayRef<uint8_t> Contents;
469   std::set<const SectionBase *, SectionCompare> Sections;
470 
Segment(ArrayRef<uint8_t> Data)471   explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {}
472   Segment() = default;
473 
firstSection()474   const SectionBase *firstSection() const {
475     if (!Sections.empty())
476       return *Sections.begin();
477     return nullptr;
478   }
479 
removeSection(const SectionBase * Sec)480   void removeSection(const SectionBase *Sec) { Sections.erase(Sec); }
addSection(const SectionBase * Sec)481   void addSection(const SectionBase *Sec) { Sections.insert(Sec); }
482 
getContents()483   ArrayRef<uint8_t> getContents() const { return Contents; }
484 };
485 
486 class Section : public SectionBase {
487   MAKE_SEC_WRITER_FRIEND
488 
489   ArrayRef<uint8_t> Contents;
490   SectionBase *LinkSection = nullptr;
491   bool HasSymTabLink = false;
492 
493 public:
Section(ArrayRef<uint8_t> Data)494   explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {}
495 
496   Error accept(SectionVisitor &Visitor) const override;
497   Error accept(MutableSectionVisitor &Visitor) override;
498   Error removeSectionReferences(
499       bool AllowBrokenLinks,
500       function_ref<bool(const SectionBase *)> ToRemove) override;
501   Error initialize(SectionTableRef SecTable) override;
502   void finalize() override;
hasContents()503   bool hasContents() const override {
504     return Type != ELF::SHT_NOBITS && Type != ELF::SHT_NULL;
505   }
506   void restoreSymTabLink(SymbolTableSection &SymTab) override;
507 };
508 
509 class OwnedDataSection : public SectionBase {
510   MAKE_SEC_WRITER_FRIEND
511 
512   std::vector<uint8_t> Data;
513 
514 public:
OwnedDataSection(StringRef SecName,ArrayRef<uint8_t> Data)515   OwnedDataSection(StringRef SecName, ArrayRef<uint8_t> Data)
516       : Data(std::begin(Data), std::end(Data)) {
517     Name = SecName.str();
518     Type = OriginalType = ELF::SHT_PROGBITS;
519     Size = Data.size();
520     OriginalOffset = std::numeric_limits<uint64_t>::max();
521   }
522 
OwnedDataSection(const Twine & SecName,uint64_t SecAddr,uint64_t SecFlags,uint64_t SecOff)523   OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags,
524                    uint64_t SecOff) {
525     Name = SecName.str();
526     Type = OriginalType = ELF::SHT_PROGBITS;
527     Addr = SecAddr;
528     Flags = OriginalFlags = SecFlags;
529     OriginalOffset = SecOff;
530   }
531 
OwnedDataSection(SectionBase & S,ArrayRef<uint8_t> Data)532   OwnedDataSection(SectionBase &S, ArrayRef<uint8_t> Data)
533       : SectionBase(S), Data(std::begin(Data), std::end(Data)) {
534     Size = Data.size();
535   }
536 
537   void appendHexData(StringRef HexData);
538   Error accept(SectionVisitor &Sec) const override;
539   Error accept(MutableSectionVisitor &Visitor) override;
hasContents()540   bool hasContents() const override { return true; }
541 };
542 
543 class CompressedSection : public SectionBase {
544   MAKE_SEC_WRITER_FRIEND
545 
546   uint32_t ChType = 0;
547   DebugCompressionType CompressionType;
548   uint64_t DecompressedSize;
549   uint64_t DecompressedAlign;
550   SmallVector<uint8_t, 128> CompressedData;
551 
552 public:
553   CompressedSection(const SectionBase &Sec,
554     DebugCompressionType CompressionType, bool Is64Bits);
555   CompressedSection(ArrayRef<uint8_t> CompressedData, uint32_t ChType,
556                     uint64_t DecompressedSize, uint64_t DecompressedAlign);
557 
getDecompressedSize()558   uint64_t getDecompressedSize() const { return DecompressedSize; }
getDecompressedAlign()559   uint64_t getDecompressedAlign() const { return DecompressedAlign; }
getChType()560   uint64_t getChType() const { return ChType; }
561 
562   Error accept(SectionVisitor &Visitor) const override;
563   Error accept(MutableSectionVisitor &Visitor) override;
564 
classof(const SectionBase * S)565   static bool classof(const SectionBase *S) {
566     return S->OriginalFlags & ELF::SHF_COMPRESSED;
567   }
568 };
569 
570 class DecompressedSection : public SectionBase {
571   MAKE_SEC_WRITER_FRIEND
572 
573 public:
574   uint32_t ChType;
DecompressedSection(const CompressedSection & Sec)575   explicit DecompressedSection(const CompressedSection &Sec)
576       : SectionBase(Sec), ChType(Sec.getChType()) {
577     Size = Sec.getDecompressedSize();
578     Align = Sec.getDecompressedAlign();
579     Flags = OriginalFlags = (Flags & ~ELF::SHF_COMPRESSED);
580   }
581 
582   Error accept(SectionVisitor &Visitor) const override;
583   Error accept(MutableSectionVisitor &Visitor) override;
584 };
585 
586 // There are two types of string tables that can exist, dynamic and not dynamic.
587 // In the dynamic case the string table is allocated. Changing a dynamic string
588 // table would mean altering virtual addresses and thus the memory image. So
589 // dynamic string tables should not have an interface to modify them or
590 // reconstruct them. This type lets us reconstruct a string table. To avoid
591 // this class being used for dynamic string tables (which has happened) the
592 // classof method checks that the particular instance is not allocated. This
593 // then agrees with the makeSection method used to construct most sections.
594 class StringTableSection : public SectionBase {
595   MAKE_SEC_WRITER_FRIEND
596 
597   StringTableBuilder StrTabBuilder;
598 
599 public:
StringTableSection()600   StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) {
601     Type = OriginalType = ELF::SHT_STRTAB;
602   }
603 
604   void addString(StringRef Name);
605   uint32_t findIndex(StringRef Name) const;
606   void prepareForLayout();
607   Error accept(SectionVisitor &Visitor) const override;
608   Error accept(MutableSectionVisitor &Visitor) override;
609 
classof(const SectionBase * S)610   static bool classof(const SectionBase *S) {
611     if (S->OriginalFlags & ELF::SHF_ALLOC)
612       return false;
613     return S->OriginalType == ELF::SHT_STRTAB;
614   }
615 };
616 
617 // Symbols have a st_shndx field that normally stores an index but occasionally
618 // stores a different special value. This enum keeps track of what the st_shndx
619 // field means. Most of the values are just copies of the special SHN_* values.
620 // SYMBOL_SIMPLE_INDEX means that the st_shndx is just an index of a section.
621 enum SymbolShndxType {
622   SYMBOL_SIMPLE_INDEX = 0,
623   SYMBOL_ABS = ELF::SHN_ABS,
624   SYMBOL_COMMON = ELF::SHN_COMMON,
625   SYMBOL_LOPROC = ELF::SHN_LOPROC,
626   SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS,
627   SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON,
628   SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2,
629   SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4,
630   SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8,
631   SYMBOL_MIPS_ACOMMON = ELF::SHN_MIPS_ACOMMON,
632   SYMBOL_MIPS_TEXT = ELF::SHN_MIPS_TEXT,
633   SYMBOL_MIPS_DATA = ELF::SHN_MIPS_DATA,
634   SYMBOL_MIPS_SCOMMON = ELF::SHN_MIPS_SCOMMON,
635   SYMBOL_MIPS_SUNDEFINED = ELF::SHN_MIPS_SUNDEFINED,
636   SYMBOL_HIPROC = ELF::SHN_HIPROC,
637   SYMBOL_LOOS = ELF::SHN_LOOS,
638   SYMBOL_HIOS = ELF::SHN_HIOS,
639   SYMBOL_XINDEX = ELF::SHN_XINDEX,
640 };
641 
642 struct Symbol {
643   uint8_t Binding;
644   SectionBase *DefinedIn = nullptr;
645   SymbolShndxType ShndxType;
646   uint32_t Index;
647   std::string Name;
648   uint32_t NameIndex;
649   uint64_t Size;
650   uint8_t Type;
651   uint64_t Value;
652   uint8_t Visibility;
653   bool Referenced = false;
654 
655   uint16_t getShndx() const;
656   bool isCommon() const;
657 };
658 
659 class SectionIndexSection : public SectionBase {
660   MAKE_SEC_WRITER_FRIEND
661 
662 private:
663   std::vector<uint32_t> Indexes;
664   SymbolTableSection *Symbols = nullptr;
665 
666 public:
~SectionIndexSection()667   virtual ~SectionIndexSection() {}
addIndex(uint32_t Index)668   void addIndex(uint32_t Index) {
669     assert(Size > 0);
670     Indexes.push_back(Index);
671   }
672 
reserve(size_t NumSymbols)673   void reserve(size_t NumSymbols) {
674     Indexes.reserve(NumSymbols);
675     Size = NumSymbols * 4;
676   }
setSymTab(SymbolTableSection * SymTab)677   void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; }
678   Error initialize(SectionTableRef SecTable) override;
679   void finalize() override;
680   Error accept(SectionVisitor &Visitor) const override;
681   Error accept(MutableSectionVisitor &Visitor) override;
682 
SectionIndexSection()683   SectionIndexSection() {
684     Name = ".symtab_shndx";
685     Align = 4;
686     EntrySize = 4;
687     Type = OriginalType = ELF::SHT_SYMTAB_SHNDX;
688   }
689 };
690 
691 class SymbolTableSection : public SectionBase {
692   MAKE_SEC_WRITER_FRIEND
693 
setStrTab(StringTableSection * StrTab)694   void setStrTab(StringTableSection *StrTab) { SymbolNames = StrTab; }
695   void assignIndices();
696 
697 protected:
698   std::vector<std::unique_ptr<Symbol>> Symbols;
699   StringTableSection *SymbolNames = nullptr;
700   SectionIndexSection *SectionIndexTable = nullptr;
701   bool IndicesChanged = false;
702 
703   using SymPtr = std::unique_ptr<Symbol>;
704 
705 public:
SymbolTableSection()706   SymbolTableSection() { Type = OriginalType = ELF::SHT_SYMTAB; }
707 
708   void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn,
709                  uint64_t Value, uint8_t Visibility, uint16_t Shndx,
710                  uint64_t SymbolSize);
711   void prepareForLayout();
712   // An 'empty' symbol table still contains a null symbol.
empty()713   bool empty() const { return Symbols.size() == 1; }
indicesChanged()714   bool indicesChanged() const { return IndicesChanged; }
setShndxTable(SectionIndexSection * ShndxTable)715   void setShndxTable(SectionIndexSection *ShndxTable) {
716     SectionIndexTable = ShndxTable;
717   }
getShndxTable()718   const SectionIndexSection *getShndxTable() const { return SectionIndexTable; }
719   void fillShndxTable();
getStrTab()720   const SectionBase *getStrTab() const { return SymbolNames; }
721   Expected<const Symbol *> getSymbolByIndex(uint32_t Index) const;
722   Expected<Symbol *> getSymbolByIndex(uint32_t Index);
723   void updateSymbols(function_ref<void(Symbol &)> Callable);
724 
725   Error removeSectionReferences(
726       bool AllowBrokenLinks,
727       function_ref<bool(const SectionBase *)> ToRemove) override;
728   Error initialize(SectionTableRef SecTable) override;
729   void finalize() override;
730   Error accept(SectionVisitor &Visitor) const override;
731   Error accept(MutableSectionVisitor &Visitor) override;
732   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
733   void replaceSectionReferences(
734       const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
735 
classof(const SectionBase * S)736   static bool classof(const SectionBase *S) {
737     return S->OriginalType == ELF::SHT_SYMTAB;
738   }
739 };
740 
741 struct Relocation {
742   Symbol *RelocSymbol = nullptr;
743   uint64_t Offset;
744   uint64_t Addend;
745   uint32_t Type;
746 };
747 
748 // All relocation sections denote relocations to apply to another section.
749 // However, some relocation sections use a dynamic symbol table and others use
750 // a regular symbol table. Because the types of the two symbol tables differ in
751 // our system (because they should behave differently) we can't uniformly
752 // represent all relocations with the same base class if we expose an interface
753 // that mentions the symbol table type. So we split the two base types into two
754 // different classes, one which handles the section the relocation is applied to
755 // and another which handles the symbol table type. The symbol table type is
756 // taken as a type parameter to the class (see RelocSectionWithSymtabBase).
757 class RelocationSectionBase : public SectionBase {
758 protected:
759   SectionBase *SecToApplyRel = nullptr;
760 
761 public:
getSection()762   const SectionBase *getSection() const { return SecToApplyRel; }
setSection(SectionBase * Sec)763   void setSection(SectionBase *Sec) { SecToApplyRel = Sec; }
764 
765   StringRef getNamePrefix() const;
766 
classof(const SectionBase * S)767   static bool classof(const SectionBase *S) {
768     return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
769   }
770 };
771 
772 // Takes the symbol table type to use as a parameter so that we can deduplicate
773 // that code between the two symbol table types.
774 template <class SymTabType>
775 class RelocSectionWithSymtabBase : public RelocationSectionBase {
setSymTab(SymTabType * SymTab)776   void setSymTab(SymTabType *SymTab) { Symbols = SymTab; }
777 
778 protected:
779   RelocSectionWithSymtabBase() = default;
780 
781   SymTabType *Symbols = nullptr;
782 
783 public:
784   Error initialize(SectionTableRef SecTable) override;
785   void finalize() override;
786 };
787 
788 class RelocationSection
789     : public RelocSectionWithSymtabBase<SymbolTableSection> {
790   MAKE_SEC_WRITER_FRIEND
791 
792   std::vector<Relocation> Relocations;
793   const Object &Obj;
794 
795 public:
RelocationSection(const Object & O)796   RelocationSection(const Object &O) : Obj(O) {}
addRelocation(Relocation Rel)797   void addRelocation(Relocation Rel) { Relocations.push_back(Rel); }
798   Error accept(SectionVisitor &Visitor) const override;
799   Error accept(MutableSectionVisitor &Visitor) override;
800   Error removeSectionReferences(
801       bool AllowBrokenLinks,
802       function_ref<bool(const SectionBase *)> ToRemove) override;
803   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
804   void markSymbols() override;
805   void replaceSectionReferences(
806       const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
getObject()807   const Object &getObject() const { return Obj; }
808 
classof(const SectionBase * S)809   static bool classof(const SectionBase *S) {
810     if (S->OriginalFlags & ELF::SHF_ALLOC)
811       return false;
812     return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
813   }
814 };
815 
816 // TODO: The way stripping and groups interact is complicated
817 // and still needs to be worked on.
818 
819 class GroupSection : public SectionBase {
820   MAKE_SEC_WRITER_FRIEND
821   const SymbolTableSection *SymTab = nullptr;
822   Symbol *Sym = nullptr;
823   ELF::Elf32_Word FlagWord;
824   SmallVector<SectionBase *, 3> GroupMembers;
825 
826 public:
827   // TODO: Contents is present in several classes of the hierarchy.
828   // This needs to be refactored to avoid duplication.
829   ArrayRef<uint8_t> Contents;
830 
GroupSection(ArrayRef<uint8_t> Data)831   explicit GroupSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
832 
setSymTab(const SymbolTableSection * SymTabSec)833   void setSymTab(const SymbolTableSection *SymTabSec) { SymTab = SymTabSec; }
setSymbol(Symbol * S)834   void setSymbol(Symbol *S) { Sym = S; }
setFlagWord(ELF::Elf32_Word W)835   void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; }
addMember(SectionBase * Sec)836   void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); }
837 
838   Error accept(SectionVisitor &) const override;
839   Error accept(MutableSectionVisitor &Visitor) override;
840   void finalize() override;
841   Error removeSectionReferences(
842       bool AllowBrokenLinks,
843       function_ref<bool(const SectionBase *)> ToRemove) override;
844   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
845   void markSymbols() override;
846   void replaceSectionReferences(
847       const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
848   void onRemove() override;
849 
classof(const SectionBase * S)850   static bool classof(const SectionBase *S) {
851     return S->OriginalType == ELF::SHT_GROUP;
852   }
853 };
854 
855 class DynamicSymbolTableSection : public Section {
856 public:
DynamicSymbolTableSection(ArrayRef<uint8_t> Data)857   explicit DynamicSymbolTableSection(ArrayRef<uint8_t> Data) : Section(Data) {}
858 
classof(const SectionBase * S)859   static bool classof(const SectionBase *S) {
860     return S->OriginalType == ELF::SHT_DYNSYM;
861   }
862 };
863 
864 class DynamicSection : public Section {
865 public:
DynamicSection(ArrayRef<uint8_t> Data)866   explicit DynamicSection(ArrayRef<uint8_t> Data) : Section(Data) {}
867 
classof(const SectionBase * S)868   static bool classof(const SectionBase *S) {
869     return S->OriginalType == ELF::SHT_DYNAMIC;
870   }
871 };
872 
873 class DynamicRelocationSection
874     : public RelocSectionWithSymtabBase<DynamicSymbolTableSection> {
875   MAKE_SEC_WRITER_FRIEND
876 
877 private:
878   ArrayRef<uint8_t> Contents;
879 
880 public:
DynamicRelocationSection(ArrayRef<uint8_t> Data)881   explicit DynamicRelocationSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
882 
883   Error accept(SectionVisitor &) const override;
884   Error accept(MutableSectionVisitor &Visitor) override;
885   Error removeSectionReferences(
886       bool AllowBrokenLinks,
887       function_ref<bool(const SectionBase *)> ToRemove) override;
888 
classof(const SectionBase * S)889   static bool classof(const SectionBase *S) {
890     if (!(S->OriginalFlags & ELF::SHF_ALLOC))
891       return false;
892     return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
893   }
894 };
895 
896 class GnuDebugLinkSection : public SectionBase {
897   MAKE_SEC_WRITER_FRIEND
898 
899 private:
900   StringRef FileName;
901   uint32_t CRC32;
902 
903   void init(StringRef File);
904 
905 public:
906   // If we add this section from an external source we can use this ctor.
907   explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC);
908   Error accept(SectionVisitor &Visitor) const override;
909   Error accept(MutableSectionVisitor &Visitor) override;
910 };
911 
912 class Reader {
913 public:
914   virtual ~Reader();
915   virtual Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const = 0;
916 };
917 
918 using object::Binary;
919 using object::ELFFile;
920 using object::ELFObjectFile;
921 using object::OwningBinary;
922 
923 class BasicELFBuilder {
924 protected:
925   std::unique_ptr<Object> Obj;
926 
927   void initFileHeader();
928   void initHeaderSegment();
929   StringTableSection *addStrTab();
930   SymbolTableSection *addSymTab(StringTableSection *StrTab);
931   Error initSections();
932 
933 public:
BasicELFBuilder()934   BasicELFBuilder() : Obj(std::make_unique<Object>()) {}
935 };
936 
937 class BinaryELFBuilder : public BasicELFBuilder {
938   MemoryBuffer *MemBuf;
939   uint8_t NewSymbolVisibility;
940   void addData(SymbolTableSection *SymTab);
941 
942 public:
BinaryELFBuilder(MemoryBuffer * MB,uint8_t NewSymbolVisibility)943   BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility)
944       : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {}
945 
946   Expected<std::unique_ptr<Object>> build();
947 };
948 
949 class IHexELFBuilder : public BasicELFBuilder {
950   const std::vector<IHexRecord> &Records;
951 
952   void addDataSections();
953 
954 public:
IHexELFBuilder(const std::vector<IHexRecord> & Records)955   IHexELFBuilder(const std::vector<IHexRecord> &Records) : Records(Records) {}
956 
957   Expected<std::unique_ptr<Object>> build();
958 };
959 
960 template <class ELFT> class ELFBuilder {
961 private:
962   using Elf_Addr = typename ELFT::Addr;
963   using Elf_Shdr = typename ELFT::Shdr;
964   using Elf_Word = typename ELFT::Word;
965 
966   const ELFFile<ELFT> &ElfFile;
967   Object &Obj;
968   size_t EhdrOffset = 0;
969   std::optional<StringRef> ExtractPartition;
970 
971   void setParentSegment(Segment &Child);
972   Error readProgramHeaders(const ELFFile<ELFT> &HeadersFile);
973   Error initGroupSection(GroupSection *GroupSec);
974   Error initSymbolTable(SymbolTableSection *SymTab);
975   Error readSectionHeaders();
976   Error readSections(bool EnsureSymtab);
977   Error findEhdrOffset();
978   Expected<SectionBase &> makeSection(const Elf_Shdr &Shdr);
979 
980 public:
981   ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj,
982              std::optional<StringRef> ExtractPartition);
983 
984   Error build(bool EnsureSymtab);
985 };
986 
987 class BinaryReader : public Reader {
988   MemoryBuffer *MemBuf;
989   uint8_t NewSymbolVisibility;
990 
991 public:
BinaryReader(MemoryBuffer * MB,const uint8_t NewSymbolVisibility)992   BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility)
993       : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {}
994   Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
995 };
996 
997 class IHexReader : public Reader {
998   MemoryBuffer *MemBuf;
999 
1000   Expected<std::vector<IHexRecord>> parse() const;
parseError(size_t LineNo,Error E)1001   Error parseError(size_t LineNo, Error E) const {
1002     return LineNo == -1U
1003                ? createFileError(MemBuf->getBufferIdentifier(), std::move(E))
1004                : createFileError(MemBuf->getBufferIdentifier(), LineNo,
1005                                  std::move(E));
1006   }
1007   template <typename... Ts>
parseError(size_t LineNo,char const * Fmt,const Ts &...Vals)1008   Error parseError(size_t LineNo, char const *Fmt, const Ts &...Vals) const {
1009     Error E = createStringError(errc::invalid_argument, Fmt, Vals...);
1010     return parseError(LineNo, std::move(E));
1011   }
1012 
1013 public:
IHexReader(MemoryBuffer * MB)1014   IHexReader(MemoryBuffer *MB) : MemBuf(MB) {}
1015 
1016   Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
1017 };
1018 
1019 class ELFReader : public Reader {
1020   Binary *Bin;
1021   std::optional<StringRef> ExtractPartition;
1022 
1023 public:
1024   Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
ELFReader(Binary * B,std::optional<StringRef> ExtractPartition)1025   explicit ELFReader(Binary *B, std::optional<StringRef> ExtractPartition)
1026       : Bin(B), ExtractPartition(ExtractPartition) {}
1027 };
1028 
1029 class Object {
1030 private:
1031   using SecPtr = std::unique_ptr<SectionBase>;
1032   using SegPtr = std::unique_ptr<Segment>;
1033 
1034   std::vector<SecPtr> Sections;
1035   std::vector<SegPtr> Segments;
1036   std::vector<SecPtr> RemovedSections;
1037   DenseMap<SectionBase *, std::vector<uint8_t>> UpdatedSections;
1038 
sectionIsAlloc(const SectionBase & Sec)1039   static bool sectionIsAlloc(const SectionBase &Sec) {
1040     return Sec.Flags & ELF::SHF_ALLOC;
1041   };
1042 
1043 public:
1044   template <class T>
1045   using ConstRange = iterator_range<pointee_iterator<
1046       typename std::vector<std::unique_ptr<T>>::const_iterator>>;
1047 
1048   // It is often the case that the ELF header and the program header table are
1049   // not present in any segment. This could be a problem during file layout,
1050   // because other segments may get assigned an offset where either of the
1051   // two should reside, which will effectively corrupt the resulting binary.
1052   // Other than that we use these segments to track program header offsets
1053   // when they may not follow the ELF header.
1054   Segment ElfHdrSegment;
1055   Segment ProgramHdrSegment;
1056 
1057   bool Is64Bits;
1058   uint8_t OSABI;
1059   uint8_t ABIVersion;
1060   uint64_t Entry;
1061   uint64_t SHOff;
1062   uint32_t Type;
1063   uint32_t Machine;
1064   uint32_t Version;
1065   uint32_t Flags;
1066 
1067   bool HadShdrs = true;
1068   bool MustBeRelocatable = false;
1069   StringTableSection *SectionNames = nullptr;
1070   SymbolTableSection *SymbolTable = nullptr;
1071   SectionIndexSection *SectionIndexTable = nullptr;
1072 
1073   bool IsMips64EL = false;
1074 
sections()1075   SectionTableRef sections() const { return SectionTableRef(Sections); }
1076   iterator_range<
1077       filter_iterator<pointee_iterator<std::vector<SecPtr>::const_iterator>,
1078                       decltype(&sectionIsAlloc)>>
allocSections()1079   allocSections() const {
1080     return make_filter_range(make_pointee_range(Sections), sectionIsAlloc);
1081   }
1082 
getUpdatedSections()1083   const auto &getUpdatedSections() const { return UpdatedSections; }
1084   Error updateSection(StringRef Name, ArrayRef<uint8_t> Data);
1085 
findSection(StringRef Name)1086   SectionBase *findSection(StringRef Name) {
1087     auto SecIt =
1088         find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; });
1089     return SecIt == Sections.end() ? nullptr : SecIt->get();
1090   }
removedSections()1091   SectionTableRef removedSections() { return SectionTableRef(RemovedSections); }
1092 
segments()1093   ConstRange<Segment> segments() const { return make_pointee_range(Segments); }
1094 
1095   Error removeSections(bool AllowBrokenLinks,
1096                        std::function<bool(const SectionBase &)> ToRemove);
1097   Error replaceSections(const DenseMap<SectionBase *, SectionBase *> &FromTo);
1098   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
addSection(Ts &&...Args)1099   template <class T, class... Ts> T &addSection(Ts &&...Args) {
1100     auto Sec = std::make_unique<T>(std::forward<Ts>(Args)...);
1101     auto Ptr = Sec.get();
1102     MustBeRelocatable |= isa<RelocationSection>(*Ptr);
1103     Sections.emplace_back(std::move(Sec));
1104     Ptr->Index = Sections.size();
1105     return *Ptr;
1106   }
1107   Error addNewSymbolTable();
addSegment(ArrayRef<uint8_t> Data)1108   Segment &addSegment(ArrayRef<uint8_t> Data) {
1109     Segments.emplace_back(std::make_unique<Segment>(Data));
1110     return *Segments.back();
1111   }
isRelocatable()1112   bool isRelocatable() const {
1113     return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable;
1114   }
1115 };
1116 
1117 } // end namespace elf
1118 } // end namespace objcopy
1119 } // end namespace llvm
1120 
1121 #endif // LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H
1122