1 //===- Object.h -------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_OBJCOPY_OBJECT_H
10 #define LLVM_TOOLS_OBJCOPY_OBJECT_H
11 
12 #include "Buffer.h"
13 #include "CopyConfig.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/BinaryFormat/ELF.h"
18 #include "llvm/MC/StringTableBuilder.h"
19 #include "llvm/Object/ELFObjectFile.h"
20 #include "llvm/Support/Errc.h"
21 #include "llvm/Support/FileOutputBuffer.h"
22 #include <cstddef>
23 #include <cstdint>
24 #include <functional>
25 #include <memory>
26 #include <set>
27 #include <vector>
28 
29 namespace llvm {
30 enum class DebugCompressionType;
31 namespace objcopy {
32 namespace elf {
33 
34 class SectionBase;
35 class Section;
36 class OwnedDataSection;
37 class StringTableSection;
38 class SymbolTableSection;
39 class RelocationSection;
40 class DynamicRelocationSection;
41 class GnuDebugLinkSection;
42 class GroupSection;
43 class SectionIndexSection;
44 class CompressedSection;
45 class DecompressedSection;
46 class Segment;
47 class Object;
48 struct Symbol;
49 
50 class SectionTableRef {
51   MutableArrayRef<std::unique_ptr<SectionBase>> Sections;
52 
53 public:
54   using iterator = pointee_iterator<std::unique_ptr<SectionBase> *>;
55 
SectionTableRef(MutableArrayRef<std::unique_ptr<SectionBase>> Secs)56   explicit SectionTableRef(MutableArrayRef<std::unique_ptr<SectionBase>> Secs)
57       : Sections(Secs) {}
58   SectionTableRef(const SectionTableRef &) = default;
59 
begin()60   iterator begin() const { return iterator(Sections.data()); }
end()61   iterator end() const { return iterator(Sections.data() + Sections.size()); }
size()62   size_t size() const { return Sections.size(); }
63 
64   Expected<SectionBase *> getSection(uint32_t Index, Twine ErrMsg);
65 
66   template <class T>
67   Expected<T *> getSectionOfType(uint32_t Index, Twine IndexErrMsg,
68                                  Twine TypeErrMsg);
69 };
70 
71 enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE };
72 
73 class SectionVisitor {
74 public:
75   virtual ~SectionVisitor() = default;
76 
77   virtual Error visit(const Section &Sec) = 0;
78   virtual Error visit(const OwnedDataSection &Sec) = 0;
79   virtual Error visit(const StringTableSection &Sec) = 0;
80   virtual Error visit(const SymbolTableSection &Sec) = 0;
81   virtual Error visit(const RelocationSection &Sec) = 0;
82   virtual Error visit(const DynamicRelocationSection &Sec) = 0;
83   virtual Error visit(const GnuDebugLinkSection &Sec) = 0;
84   virtual Error visit(const GroupSection &Sec) = 0;
85   virtual Error visit(const SectionIndexSection &Sec) = 0;
86   virtual Error visit(const CompressedSection &Sec) = 0;
87   virtual Error visit(const DecompressedSection &Sec) = 0;
88 };
89 
90 class MutableSectionVisitor {
91 public:
92   virtual ~MutableSectionVisitor() = default;
93 
94   virtual Error visit(Section &Sec) = 0;
95   virtual Error visit(OwnedDataSection &Sec) = 0;
96   virtual Error visit(StringTableSection &Sec) = 0;
97   virtual Error visit(SymbolTableSection &Sec) = 0;
98   virtual Error visit(RelocationSection &Sec) = 0;
99   virtual Error visit(DynamicRelocationSection &Sec) = 0;
100   virtual Error visit(GnuDebugLinkSection &Sec) = 0;
101   virtual Error visit(GroupSection &Sec) = 0;
102   virtual Error visit(SectionIndexSection &Sec) = 0;
103   virtual Error visit(CompressedSection &Sec) = 0;
104   virtual Error visit(DecompressedSection &Sec) = 0;
105 };
106 
107 class SectionWriter : public SectionVisitor {
108 protected:
109   Buffer &Out;
110 
111 public:
112   virtual ~SectionWriter() = default;
113 
114   Error visit(const Section &Sec) override;
115   Error visit(const OwnedDataSection &Sec) override;
116   Error visit(const StringTableSection &Sec) override;
117   Error visit(const DynamicRelocationSection &Sec) override;
118   virtual Error visit(const SymbolTableSection &Sec) override = 0;
119   virtual Error visit(const RelocationSection &Sec) override = 0;
120   virtual Error visit(const GnuDebugLinkSection &Sec) override = 0;
121   virtual Error visit(const GroupSection &Sec) override = 0;
122   virtual Error visit(const SectionIndexSection &Sec) override = 0;
123   virtual Error visit(const CompressedSection &Sec) override = 0;
124   virtual Error visit(const DecompressedSection &Sec) override = 0;
125 
SectionWriter(Buffer & Buf)126   explicit SectionWriter(Buffer &Buf) : Out(Buf) {}
127 };
128 
129 template <class ELFT> class ELFSectionWriter : public SectionWriter {
130 private:
131   using Elf_Word = typename ELFT::Word;
132   using Elf_Rel = typename ELFT::Rel;
133   using Elf_Rela = typename ELFT::Rela;
134   using Elf_Sym = typename ELFT::Sym;
135 
136 public:
~ELFSectionWriter()137   virtual ~ELFSectionWriter() {}
138   Error visit(const SymbolTableSection &Sec) override;
139   Error visit(const RelocationSection &Sec) override;
140   Error visit(const GnuDebugLinkSection &Sec) override;
141   Error visit(const GroupSection &Sec) override;
142   Error visit(const SectionIndexSection &Sec) override;
143   Error visit(const CompressedSection &Sec) override;
144   Error visit(const DecompressedSection &Sec) override;
145 
ELFSectionWriter(Buffer & Buf)146   explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
147 };
148 
149 template <class ELFT> class ELFSectionSizer : public MutableSectionVisitor {
150 private:
151   using Elf_Rel = typename ELFT::Rel;
152   using Elf_Rela = typename ELFT::Rela;
153   using Elf_Sym = typename ELFT::Sym;
154   using Elf_Word = typename ELFT::Word;
155   using Elf_Xword = typename ELFT::Xword;
156 
157 public:
158   Error visit(Section &Sec) override;
159   Error visit(OwnedDataSection &Sec) override;
160   Error visit(StringTableSection &Sec) override;
161   Error visit(DynamicRelocationSection &Sec) override;
162   Error visit(SymbolTableSection &Sec) override;
163   Error visit(RelocationSection &Sec) override;
164   Error visit(GnuDebugLinkSection &Sec) override;
165   Error visit(GroupSection &Sec) override;
166   Error visit(SectionIndexSection &Sec) override;
167   Error visit(CompressedSection &Sec) override;
168   Error visit(DecompressedSection &Sec) override;
169 };
170 
171 #define MAKE_SEC_WRITER_FRIEND                                                 \
172   friend class SectionWriter;                                                  \
173   friend class IHexSectionWriterBase;                                          \
174   friend class IHexSectionWriter;                                              \
175   template <class ELFT> friend class ELFSectionWriter;                         \
176   template <class ELFT> friend class ELFSectionSizer;
177 
178 class BinarySectionWriter : public SectionWriter {
179 public:
~BinarySectionWriter()180   virtual ~BinarySectionWriter() {}
181 
182   Error visit(const SymbolTableSection &Sec) override;
183   Error visit(const RelocationSection &Sec) override;
184   Error visit(const GnuDebugLinkSection &Sec) override;
185   Error visit(const GroupSection &Sec) override;
186   Error visit(const SectionIndexSection &Sec) override;
187   Error visit(const CompressedSection &Sec) override;
188   Error visit(const DecompressedSection &Sec) override;
189 
BinarySectionWriter(Buffer & Buf)190   explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
191 };
192 
193 using IHexLineData = SmallVector<char, 64>;
194 
195 struct IHexRecord {
196   // Memory address of the record.
197   uint16_t Addr;
198   // Record type (see below).
199   uint16_t Type;
200   // Record data in hexadecimal form.
201   StringRef HexData;
202 
203   // Helper method to get file length of the record
204   // including newline character
getLengthIHexRecord205   static size_t getLength(size_t DataSize) {
206     // :LLAAAATT[DD...DD]CC'
207     return DataSize * 2 + 11;
208   }
209 
210   // Gets length of line in a file (getLength + CRLF).
getLineLengthIHexRecord211   static size_t getLineLength(size_t DataSize) {
212     return getLength(DataSize) + 2;
213   }
214 
215   // Given type, address and data returns line which can
216   // be written to output file.
217   static IHexLineData getLine(uint8_t Type, uint16_t Addr,
218                               ArrayRef<uint8_t> Data);
219 
220   // Parses the line and returns record if possible.
221   // Line should be trimmed from whitespace characters.
222   static Expected<IHexRecord> parse(StringRef Line);
223 
224   // Calculates checksum of stringified record representation
225   // S must NOT contain leading ':' and trailing whitespace
226   // characters
227   static uint8_t getChecksum(StringRef S);
228 
229   enum Type {
230     // Contains data and a 16-bit starting address for the data.
231     // The byte count specifies number of data bytes in the record.
232     Data = 0,
233     // Must occur exactly once per file in the last line of the file.
234     // The data field is empty (thus byte count is 00) and the address
235     // field is typically 0000.
236     EndOfFile = 1,
237     // The data field contains a 16-bit segment base address (thus byte
238     // count is always 02) compatible with 80x86 real mode addressing.
239     // The address field (typically 0000) is ignored. The segment address
240     // from the most recent 02 record is multiplied by 16 and added to each
241     // subsequent data record address to form the physical starting address
242     // for the data. This allows addressing up to one megabyte of address
243     // space.
244     SegmentAddr = 2,
245     // or 80x86 processors, specifies the initial content of the CS:IP
246     // registers. The address field is 0000, the byte count is always 04,
247     // the first two data bytes are the CS value, the latter two are the
248     // IP value.
249     StartAddr80x86 = 3,
250     // Allows for 32 bit addressing (up to 4GiB). The record's address field
251     // is ignored (typically 0000) and its byte count is always 02. The two
252     // data bytes (big endian) specify the upper 16 bits of the 32 bit
253     // absolute address for all subsequent type 00 records
254     ExtendedAddr = 4,
255     // The address field is 0000 (not used) and the byte count is always 04.
256     // The four data bytes represent a 32-bit address value. In the case of
257     // 80386 and higher CPUs, this address is loaded into the EIP register.
258     StartAddr = 5,
259     // We have no other valid types
260     InvalidType = 6
261   };
262 };
263 
264 // Base class for IHexSectionWriter. This class implements writing algorithm,
265 // but doesn't actually write records. It is used for output buffer size
266 // calculation in IHexWriter::finalize.
267 class IHexSectionWriterBase : public BinarySectionWriter {
268   // 20-bit segment address
269   uint32_t SegmentAddr = 0;
270   // Extended linear address
271   uint32_t BaseAddr = 0;
272 
273   // Write segment address corresponding to 'Addr'
274   uint64_t writeSegmentAddr(uint64_t Addr);
275   // Write extended linear (base) address corresponding to 'Addr'
276   uint64_t writeBaseAddr(uint64_t Addr);
277 
278 protected:
279   // Offset in the output buffer
280   uint64_t Offset = 0;
281 
282   void writeSection(const SectionBase *Sec, ArrayRef<uint8_t> Data);
283   virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data);
284 
285 public:
IHexSectionWriterBase(Buffer & Buf)286   explicit IHexSectionWriterBase(Buffer &Buf) : BinarySectionWriter(Buf) {}
287 
getBufferOffset()288   uint64_t getBufferOffset() const { return Offset; }
289   Error visit(const Section &Sec) final;
290   Error visit(const OwnedDataSection &Sec) final;
291   Error visit(const StringTableSection &Sec) override;
292   Error visit(const DynamicRelocationSection &Sec) final;
293   using BinarySectionWriter::visit;
294 };
295 
296 // Real IHEX section writer
297 class IHexSectionWriter : public IHexSectionWriterBase {
298 public:
IHexSectionWriter(Buffer & Buf)299   IHexSectionWriter(Buffer &Buf) : IHexSectionWriterBase(Buf) {}
300 
301   void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override;
302   Error visit(const StringTableSection &Sec) override;
303 };
304 
305 class Writer {
306 protected:
307   Object &Obj;
308   Buffer &Buf;
309 
310 public:
311   virtual ~Writer();
312   virtual Error finalize() = 0;
313   virtual Error write() = 0;
314 
Writer(Object & O,Buffer & B)315   Writer(Object &O, Buffer &B) : Obj(O), Buf(B) {}
316 };
317 
318 template <class ELFT> class ELFWriter : public Writer {
319 private:
320   using Elf_Addr = typename ELFT::Addr;
321   using Elf_Shdr = typename ELFT::Shdr;
322   using Elf_Phdr = typename ELFT::Phdr;
323   using Elf_Ehdr = typename ELFT::Ehdr;
324 
325   void initEhdrSegment();
326 
327   void writeEhdr();
328   void writePhdr(const Segment &Seg);
329   void writeShdr(const SectionBase &Sec);
330 
331   void writePhdrs();
332   void writeShdrs();
333   Error writeSectionData();
334   void writeSegmentData();
335 
336   void assignOffsets();
337 
338   std::unique_ptr<ELFSectionWriter<ELFT>> SecWriter;
339 
340   size_t totalSize() const;
341 
342 public:
~ELFWriter()343   virtual ~ELFWriter() {}
344   bool WriteSectionHeaders;
345 
346   // For --only-keep-debug, select an alternative section/segment layout
347   // algorithm.
348   bool OnlyKeepDebug;
349 
350   Error finalize() override;
351   Error write() override;
352   ELFWriter(Object &Obj, Buffer &Buf, bool WSH, bool OnlyKeepDebug);
353 };
354 
355 class BinaryWriter : public Writer {
356 private:
357   std::unique_ptr<BinarySectionWriter> SecWriter;
358 
359   uint64_t TotalSize = 0;
360 
361 public:
~BinaryWriter()362   ~BinaryWriter() {}
363   Error finalize() override;
364   Error write() override;
BinaryWriter(Object & Obj,Buffer & Buf)365   BinaryWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
366 };
367 
368 class IHexWriter : public Writer {
369   struct SectionCompare {
370     bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const;
371   };
372 
373   std::set<const SectionBase *, SectionCompare> Sections;
374   size_t TotalSize = 0;
375 
376   Error checkSection(const SectionBase &Sec);
377   uint64_t writeEntryPointRecord(uint8_t *Buf);
378   uint64_t writeEndOfFileRecord(uint8_t *Buf);
379 
380 public:
~IHexWriter()381   ~IHexWriter() {}
382   Error finalize() override;
383   Error write() override;
IHexWriter(Object & Obj,Buffer & Buf)384   IHexWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
385 };
386 
387 class SectionBase {
388 public:
389   std::string Name;
390   Segment *ParentSegment = nullptr;
391   uint64_t HeaderOffset = 0;
392   uint32_t Index = 0;
393 
394   uint32_t OriginalIndex = 0;
395   uint64_t OriginalFlags = 0;
396   uint64_t OriginalType = ELF::SHT_NULL;
397   uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max();
398 
399   uint64_t Addr = 0;
400   uint64_t Align = 1;
401   uint32_t EntrySize = 0;
402   uint64_t Flags = 0;
403   uint64_t Info = 0;
404   uint64_t Link = ELF::SHN_UNDEF;
405   uint64_t NameIndex = 0;
406   uint64_t Offset = 0;
407   uint64_t Size = 0;
408   uint64_t Type = ELF::SHT_NULL;
409   ArrayRef<uint8_t> OriginalData;
410   bool HasSymbol = false;
411 
412   SectionBase() = default;
413   SectionBase(const SectionBase &) = default;
414 
415   virtual ~SectionBase() = default;
416 
417   virtual Error initialize(SectionTableRef SecTable);
418   virtual void finalize();
419   // Remove references to these sections. The list of sections must be sorted.
420   virtual Error
421   removeSectionReferences(bool AllowBrokenLinks,
422                           function_ref<bool(const SectionBase *)> ToRemove);
423   virtual Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
424   virtual Error accept(SectionVisitor &Visitor) const = 0;
425   virtual Error accept(MutableSectionVisitor &Visitor) = 0;
426   virtual void markSymbols();
427   virtual void
428   replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &);
429   // Notify the section that it is subject to removal.
430   virtual void onRemove();
431 };
432 
433 class Segment {
434 private:
435   struct SectionCompare {
operatorSectionCompare436     bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const {
437       // Some sections might have the same address if one of them is empty. To
438       // fix this we can use the lexicographic ordering on ->Addr and the
439       // original index.
440       if (Lhs->OriginalOffset == Rhs->OriginalOffset)
441         return Lhs->OriginalIndex < Rhs->OriginalIndex;
442       return Lhs->OriginalOffset < Rhs->OriginalOffset;
443     }
444   };
445 
446 public:
447   uint32_t Type = 0;
448   uint32_t Flags = 0;
449   uint64_t Offset = 0;
450   uint64_t VAddr = 0;
451   uint64_t PAddr = 0;
452   uint64_t FileSize = 0;
453   uint64_t MemSize = 0;
454   uint64_t Align = 0;
455 
456   uint32_t Index = 0;
457   uint64_t OriginalOffset = 0;
458   Segment *ParentSegment = nullptr;
459   ArrayRef<uint8_t> Contents;
460   std::set<const SectionBase *, SectionCompare> Sections;
461 
Segment(ArrayRef<uint8_t> Data)462   explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {}
463   Segment() = default;
464 
firstSection()465   const SectionBase *firstSection() const {
466     if (!Sections.empty())
467       return *Sections.begin();
468     return nullptr;
469   }
470 
removeSection(const SectionBase * Sec)471   void removeSection(const SectionBase *Sec) { Sections.erase(Sec); }
addSection(const SectionBase * Sec)472   void addSection(const SectionBase *Sec) { Sections.insert(Sec); }
473 
getContents()474   ArrayRef<uint8_t> getContents() const { return Contents; }
475 };
476 
477 class Section : public SectionBase {
478   MAKE_SEC_WRITER_FRIEND
479 
480   ArrayRef<uint8_t> Contents;
481   SectionBase *LinkSection = nullptr;
482 
483 public:
Section(ArrayRef<uint8_t> Data)484   explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {}
485 
486   Error accept(SectionVisitor &Visitor) const override;
487   Error accept(MutableSectionVisitor &Visitor) override;
488   Error removeSectionReferences(
489       bool AllowBrokenLinks,
490       function_ref<bool(const SectionBase *)> ToRemove) override;
491   Error initialize(SectionTableRef SecTable) override;
492   void finalize() override;
493 };
494 
495 class OwnedDataSection : public SectionBase {
496   MAKE_SEC_WRITER_FRIEND
497 
498   std::vector<uint8_t> Data;
499 
500 public:
OwnedDataSection(StringRef SecName,ArrayRef<uint8_t> Data)501   OwnedDataSection(StringRef SecName, ArrayRef<uint8_t> Data)
502       : Data(std::begin(Data), std::end(Data)) {
503     Name = SecName.str();
504     Type = OriginalType = ELF::SHT_PROGBITS;
505     Size = Data.size();
506     OriginalOffset = std::numeric_limits<uint64_t>::max();
507   }
508 
OwnedDataSection(const Twine & SecName,uint64_t SecAddr,uint64_t SecFlags,uint64_t SecOff)509   OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags,
510                    uint64_t SecOff) {
511     Name = SecName.str();
512     Type = OriginalType = ELF::SHT_PROGBITS;
513     Addr = SecAddr;
514     Flags = OriginalFlags = SecFlags;
515     OriginalOffset = SecOff;
516   }
517 
518   void appendHexData(StringRef HexData);
519   Error accept(SectionVisitor &Sec) const override;
520   Error accept(MutableSectionVisitor &Visitor) override;
521 };
522 
523 class CompressedSection : public SectionBase {
524   MAKE_SEC_WRITER_FRIEND
525 
526   DebugCompressionType CompressionType;
527   uint64_t DecompressedSize;
528   uint64_t DecompressedAlign;
529   SmallVector<char, 128> CompressedData;
530 
531 public:
532   static Expected<CompressedSection>
533   create(const SectionBase &Sec, DebugCompressionType CompressionType);
534   static Expected<CompressedSection> create(ArrayRef<uint8_t> CompressedData,
535                                             uint64_t DecompressedSize,
536                                             uint64_t DecompressedAlign);
537 
getDecompressedSize()538   uint64_t getDecompressedSize() const { return DecompressedSize; }
getDecompressedAlign()539   uint64_t getDecompressedAlign() const { return DecompressedAlign; }
540 
541   Error accept(SectionVisitor &Visitor) const override;
542   Error accept(MutableSectionVisitor &Visitor) override;
543 
classof(const SectionBase * S)544   static bool classof(const SectionBase *S) {
545     return (S->OriginalFlags & ELF::SHF_COMPRESSED) ||
546            (StringRef(S->Name).startswith(".zdebug"));
547   }
548 
549 private:
550   CompressedSection(const SectionBase &Sec,
551                     DebugCompressionType CompressionType, Error &Err);
552   CompressedSection(ArrayRef<uint8_t> CompressedData, uint64_t DecompressedSize,
553                     uint64_t DecompressedAlign);
554 };
555 
556 class DecompressedSection : public SectionBase {
557   MAKE_SEC_WRITER_FRIEND
558 
559 public:
DecompressedSection(const CompressedSection & Sec)560   explicit DecompressedSection(const CompressedSection &Sec)
561       : SectionBase(Sec) {
562     Size = Sec.getDecompressedSize();
563     Align = Sec.getDecompressedAlign();
564     Flags = OriginalFlags = (Flags & ~ELF::SHF_COMPRESSED);
565     if (StringRef(Name).startswith(".zdebug"))
566       Name = "." + Name.substr(2);
567   }
568 
569   Error accept(SectionVisitor &Visitor) const override;
570   Error accept(MutableSectionVisitor &Visitor) override;
571 };
572 
573 // There are two types of string tables that can exist, dynamic and not dynamic.
574 // In the dynamic case the string table is allocated. Changing a dynamic string
575 // table would mean altering virtual addresses and thus the memory image. So
576 // dynamic string tables should not have an interface to modify them or
577 // reconstruct them. This type lets us reconstruct a string table. To avoid
578 // this class being used for dynamic string tables (which has happened) the
579 // classof method checks that the particular instance is not allocated. This
580 // then agrees with the makeSection method used to construct most sections.
581 class StringTableSection : public SectionBase {
582   MAKE_SEC_WRITER_FRIEND
583 
584   StringTableBuilder StrTabBuilder;
585 
586 public:
StringTableSection()587   StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) {
588     Type = OriginalType = ELF::SHT_STRTAB;
589   }
590 
591   void addString(StringRef Name);
592   uint32_t findIndex(StringRef Name) const;
593   void prepareForLayout();
594   Error accept(SectionVisitor &Visitor) const override;
595   Error accept(MutableSectionVisitor &Visitor) override;
596 
classof(const SectionBase * S)597   static bool classof(const SectionBase *S) {
598     if (S->OriginalFlags & ELF::SHF_ALLOC)
599       return false;
600     return S->OriginalType == ELF::SHT_STRTAB;
601   }
602 };
603 
604 // Symbols have a st_shndx field that normally stores an index but occasionally
605 // stores a different special value. This enum keeps track of what the st_shndx
606 // field means. Most of the values are just copies of the special SHN_* values.
607 // SYMBOL_SIMPLE_INDEX means that the st_shndx is just an index of a section.
608 enum SymbolShndxType {
609   SYMBOL_SIMPLE_INDEX = 0,
610   SYMBOL_ABS = ELF::SHN_ABS,
611   SYMBOL_COMMON = ELF::SHN_COMMON,
612   SYMBOL_LOPROC = ELF::SHN_LOPROC,
613   SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS,
614   SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON,
615   SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2,
616   SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4,
617   SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8,
618   SYMBOL_HIPROC = ELF::SHN_HIPROC,
619   SYMBOL_LOOS = ELF::SHN_LOOS,
620   SYMBOL_HIOS = ELF::SHN_HIOS,
621   SYMBOL_XINDEX = ELF::SHN_XINDEX,
622 };
623 
624 struct Symbol {
625   uint8_t Binding;
626   SectionBase *DefinedIn = nullptr;
627   SymbolShndxType ShndxType;
628   uint32_t Index;
629   std::string Name;
630   uint32_t NameIndex;
631   uint64_t Size;
632   uint8_t Type;
633   uint64_t Value;
634   uint8_t Visibility;
635   bool Referenced = false;
636 
637   uint16_t getShndx() const;
638   bool isCommon() const;
639 };
640 
641 class SectionIndexSection : public SectionBase {
642   MAKE_SEC_WRITER_FRIEND
643 
644 private:
645   std::vector<uint32_t> Indexes;
646   SymbolTableSection *Symbols = nullptr;
647 
648 public:
~SectionIndexSection()649   virtual ~SectionIndexSection() {}
addIndex(uint32_t Index)650   void addIndex(uint32_t Index) {
651     assert(Size > 0);
652     Indexes.push_back(Index);
653   }
654 
reserve(size_t NumSymbols)655   void reserve(size_t NumSymbols) {
656     Indexes.reserve(NumSymbols);
657     Size = NumSymbols * 4;
658   }
setSymTab(SymbolTableSection * SymTab)659   void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; }
660   Error initialize(SectionTableRef SecTable) override;
661   void finalize() override;
662   Error accept(SectionVisitor &Visitor) const override;
663   Error accept(MutableSectionVisitor &Visitor) override;
664 
SectionIndexSection()665   SectionIndexSection() {
666     Name = ".symtab_shndx";
667     Align = 4;
668     EntrySize = 4;
669     Type = OriginalType = ELF::SHT_SYMTAB_SHNDX;
670   }
671 };
672 
673 class SymbolTableSection : public SectionBase {
674   MAKE_SEC_WRITER_FRIEND
675 
setStrTab(StringTableSection * StrTab)676   void setStrTab(StringTableSection *StrTab) { SymbolNames = StrTab; }
677   void assignIndices();
678 
679 protected:
680   std::vector<std::unique_ptr<Symbol>> Symbols;
681   StringTableSection *SymbolNames = nullptr;
682   SectionIndexSection *SectionIndexTable = nullptr;
683 
684   using SymPtr = std::unique_ptr<Symbol>;
685 
686 public:
SymbolTableSection()687   SymbolTableSection() { Type = OriginalType = ELF::SHT_SYMTAB; }
688 
689   void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn,
690                  uint64_t Value, uint8_t Visibility, uint16_t Shndx,
691                  uint64_t SymbolSize);
692   void prepareForLayout();
693   // An 'empty' symbol table still contains a null symbol.
empty()694   bool empty() const { return Symbols.size() == 1; }
setShndxTable(SectionIndexSection * ShndxTable)695   void setShndxTable(SectionIndexSection *ShndxTable) {
696     SectionIndexTable = ShndxTable;
697   }
getShndxTable()698   const SectionIndexSection *getShndxTable() const { return SectionIndexTable; }
699   void fillShndxTable();
getStrTab()700   const SectionBase *getStrTab() const { return SymbolNames; }
701   Expected<const Symbol *> getSymbolByIndex(uint32_t Index) const;
702   Expected<Symbol *> getSymbolByIndex(uint32_t Index);
703   void updateSymbols(function_ref<void(Symbol &)> Callable);
704 
705   Error removeSectionReferences(
706       bool AllowBrokenLinks,
707       function_ref<bool(const SectionBase *)> ToRemove) override;
708   Error initialize(SectionTableRef SecTable) override;
709   void finalize() override;
710   Error accept(SectionVisitor &Visitor) const override;
711   Error accept(MutableSectionVisitor &Visitor) override;
712   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
713   void replaceSectionReferences(
714       const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
715 
classof(const SectionBase * S)716   static bool classof(const SectionBase *S) {
717     return S->OriginalType == ELF::SHT_SYMTAB;
718   }
719 };
720 
721 struct Relocation {
722   Symbol *RelocSymbol = nullptr;
723   uint64_t Offset;
724   uint64_t Addend;
725   uint32_t Type;
726 };
727 
728 // All relocation sections denote relocations to apply to another section.
729 // However, some relocation sections use a dynamic symbol table and others use
730 // a regular symbol table. Because the types of the two symbol tables differ in
731 // our system (because they should behave differently) we can't uniformly
732 // represent all relocations with the same base class if we expose an interface
733 // that mentions the symbol table type. So we split the two base types into two
734 // different classes, one which handles the section the relocation is applied to
735 // and another which handles the symbol table type. The symbol table type is
736 // taken as a type parameter to the class (see RelocSectionWithSymtabBase).
737 class RelocationSectionBase : public SectionBase {
738 protected:
739   SectionBase *SecToApplyRel = nullptr;
740 
741 public:
getSection()742   const SectionBase *getSection() const { return SecToApplyRel; }
setSection(SectionBase * Sec)743   void setSection(SectionBase *Sec) { SecToApplyRel = Sec; }
744 
classof(const SectionBase * S)745   static bool classof(const SectionBase *S) {
746     return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
747   }
748 };
749 
750 // Takes the symbol table type to use as a parameter so that we can deduplicate
751 // that code between the two symbol table types.
752 template <class SymTabType>
753 class RelocSectionWithSymtabBase : public RelocationSectionBase {
setSymTab(SymTabType * SymTab)754   void setSymTab(SymTabType *SymTab) { Symbols = SymTab; }
755 
756 protected:
757   RelocSectionWithSymtabBase() = default;
758 
759   SymTabType *Symbols = nullptr;
760 
761 public:
762   Error initialize(SectionTableRef SecTable) override;
763   void finalize() override;
764 };
765 
766 class RelocationSection
767     : public RelocSectionWithSymtabBase<SymbolTableSection> {
768   MAKE_SEC_WRITER_FRIEND
769 
770   std::vector<Relocation> Relocations;
771 
772 public:
addRelocation(Relocation Rel)773   void addRelocation(Relocation Rel) { Relocations.push_back(Rel); }
774   Error accept(SectionVisitor &Visitor) const override;
775   Error accept(MutableSectionVisitor &Visitor) override;
776   Error removeSectionReferences(
777       bool AllowBrokenLinks,
778       function_ref<bool(const SectionBase *)> ToRemove) override;
779   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
780   void markSymbols() override;
781   void replaceSectionReferences(
782       const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
783 
classof(const SectionBase * S)784   static bool classof(const SectionBase *S) {
785     if (S->OriginalFlags & ELF::SHF_ALLOC)
786       return false;
787     return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
788   }
789 };
790 
791 // TODO: The way stripping and groups interact is complicated
792 // and still needs to be worked on.
793 
794 class GroupSection : public SectionBase {
795   MAKE_SEC_WRITER_FRIEND
796   const SymbolTableSection *SymTab = nullptr;
797   Symbol *Sym = nullptr;
798   ELF::Elf32_Word FlagWord;
799   SmallVector<SectionBase *, 3> GroupMembers;
800 
801 public:
802   // TODO: Contents is present in several classes of the hierarchy.
803   // This needs to be refactored to avoid duplication.
804   ArrayRef<uint8_t> Contents;
805 
GroupSection(ArrayRef<uint8_t> Data)806   explicit GroupSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
807 
setSymTab(const SymbolTableSection * SymTabSec)808   void setSymTab(const SymbolTableSection *SymTabSec) { SymTab = SymTabSec; }
setSymbol(Symbol * S)809   void setSymbol(Symbol *S) { Sym = S; }
setFlagWord(ELF::Elf32_Word W)810   void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; }
addMember(SectionBase * Sec)811   void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); }
812 
813   Error accept(SectionVisitor &) const override;
814   Error accept(MutableSectionVisitor &Visitor) override;
815   void finalize() override;
816   Error removeSectionReferences(
817       bool AllowBrokenLinks,
818       function_ref<bool(const SectionBase *)> ToRemove) override;
819   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
820   void markSymbols() override;
821   void replaceSectionReferences(
822       const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
823   void onRemove() override;
824 
classof(const SectionBase * S)825   static bool classof(const SectionBase *S) {
826     return S->OriginalType == ELF::SHT_GROUP;
827   }
828 };
829 
830 class DynamicSymbolTableSection : public Section {
831 public:
DynamicSymbolTableSection(ArrayRef<uint8_t> Data)832   explicit DynamicSymbolTableSection(ArrayRef<uint8_t> Data) : Section(Data) {}
833 
classof(const SectionBase * S)834   static bool classof(const SectionBase *S) {
835     return S->OriginalType == ELF::SHT_DYNSYM;
836   }
837 };
838 
839 class DynamicSection : public Section {
840 public:
DynamicSection(ArrayRef<uint8_t> Data)841   explicit DynamicSection(ArrayRef<uint8_t> Data) : Section(Data) {}
842 
classof(const SectionBase * S)843   static bool classof(const SectionBase *S) {
844     return S->OriginalType == ELF::SHT_DYNAMIC;
845   }
846 };
847 
848 class DynamicRelocationSection
849     : public RelocSectionWithSymtabBase<DynamicSymbolTableSection> {
850   MAKE_SEC_WRITER_FRIEND
851 
852 private:
853   ArrayRef<uint8_t> Contents;
854 
855 public:
DynamicRelocationSection(ArrayRef<uint8_t> Data)856   explicit DynamicRelocationSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
857 
858   Error accept(SectionVisitor &) const override;
859   Error accept(MutableSectionVisitor &Visitor) override;
860   Error removeSectionReferences(
861       bool AllowBrokenLinks,
862       function_ref<bool(const SectionBase *)> ToRemove) override;
863 
classof(const SectionBase * S)864   static bool classof(const SectionBase *S) {
865     if (!(S->OriginalFlags & ELF::SHF_ALLOC))
866       return false;
867     return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
868   }
869 };
870 
871 class GnuDebugLinkSection : public SectionBase {
872   MAKE_SEC_WRITER_FRIEND
873 
874 private:
875   StringRef FileName;
876   uint32_t CRC32;
877 
878   void init(StringRef File);
879 
880 public:
881   // If we add this section from an external source we can use this ctor.
882   explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC);
883   Error accept(SectionVisitor &Visitor) const override;
884   Error accept(MutableSectionVisitor &Visitor) override;
885 };
886 
887 class Reader {
888 public:
889   virtual ~Reader();
890   virtual Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const = 0;
891 };
892 
893 using object::Binary;
894 using object::ELFFile;
895 using object::ELFObjectFile;
896 using object::OwningBinary;
897 
898 class BasicELFBuilder {
899 protected:
900   std::unique_ptr<Object> Obj;
901 
902   void initFileHeader();
903   void initHeaderSegment();
904   StringTableSection *addStrTab();
905   SymbolTableSection *addSymTab(StringTableSection *StrTab);
906   Error initSections();
907 
908 public:
BasicELFBuilder()909   BasicELFBuilder() : Obj(std::make_unique<Object>()) {}
910 };
911 
912 class BinaryELFBuilder : public BasicELFBuilder {
913   MemoryBuffer *MemBuf;
914   uint8_t NewSymbolVisibility;
915   void addData(SymbolTableSection *SymTab);
916 
917 public:
BinaryELFBuilder(MemoryBuffer * MB,uint8_t NewSymbolVisibility)918   BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility)
919       : BasicELFBuilder(), MemBuf(MB),
920         NewSymbolVisibility(NewSymbolVisibility) {}
921 
922   Expected<std::unique_ptr<Object>> build();
923 };
924 
925 class IHexELFBuilder : public BasicELFBuilder {
926   const std::vector<IHexRecord> &Records;
927 
928   void addDataSections();
929 
930 public:
IHexELFBuilder(const std::vector<IHexRecord> & Records)931   IHexELFBuilder(const std::vector<IHexRecord> &Records)
932       : BasicELFBuilder(), Records(Records) {}
933 
934   Expected<std::unique_ptr<Object>> build();
935 };
936 
937 template <class ELFT> class ELFBuilder {
938 private:
939   using Elf_Addr = typename ELFT::Addr;
940   using Elf_Shdr = typename ELFT::Shdr;
941   using Elf_Word = typename ELFT::Word;
942 
943   const ELFFile<ELFT> &ElfFile;
944   Object &Obj;
945   size_t EhdrOffset = 0;
946   Optional<StringRef> ExtractPartition;
947 
948   void setParentSegment(Segment &Child);
949   Error readProgramHeaders(const ELFFile<ELFT> &HeadersFile);
950   Error initGroupSection(GroupSection *GroupSec);
951   Error initSymbolTable(SymbolTableSection *SymTab);
952   Error readSectionHeaders();
953   Error readSections(bool EnsureSymtab);
954   Error findEhdrOffset();
955   Expected<SectionBase &> makeSection(const Elf_Shdr &Shdr);
956 
957 public:
ELFBuilder(const ELFObjectFile<ELFT> & ElfObj,Object & Obj,Optional<StringRef> ExtractPartition)958   ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj,
959              Optional<StringRef> ExtractPartition)
960       : ElfFile(ElfObj.getELFFile()), Obj(Obj),
961         ExtractPartition(ExtractPartition) {}
962 
963   Error build(bool EnsureSymtab);
964 };
965 
966 class BinaryReader : public Reader {
967   MemoryBuffer *MemBuf;
968   uint8_t NewSymbolVisibility;
969 
970 public:
BinaryReader(MemoryBuffer * MB,const uint8_t NewSymbolVisibility)971   BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility)
972       : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {}
973   Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
974 };
975 
976 class IHexReader : public Reader {
977   MemoryBuffer *MemBuf;
978 
979   Expected<std::vector<IHexRecord>> parse() const;
parseError(size_t LineNo,Error E)980   Error parseError(size_t LineNo, Error E) const {
981     return LineNo == -1U
982                ? createFileError(MemBuf->getBufferIdentifier(), std::move(E))
983                : createFileError(MemBuf->getBufferIdentifier(), LineNo,
984                                  std::move(E));
985   }
986   template <typename... Ts>
parseError(size_t LineNo,char const * Fmt,const Ts &...Vals)987   Error parseError(size_t LineNo, char const *Fmt, const Ts &... Vals) const {
988     Error E = createStringError(errc::invalid_argument, Fmt, Vals...);
989     return parseError(LineNo, std::move(E));
990   }
991 
992 public:
IHexReader(MemoryBuffer * MB)993   IHexReader(MemoryBuffer *MB) : MemBuf(MB) {}
994 
995   Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
996 };
997 
998 class ELFReader : public Reader {
999   Binary *Bin;
1000   Optional<StringRef> ExtractPartition;
1001 
1002 public:
1003   Expected<std::unique_ptr<Object>> create(bool EnsureSymtab) const override;
ELFReader(Binary * B,Optional<StringRef> ExtractPartition)1004   explicit ELFReader(Binary *B, Optional<StringRef> ExtractPartition)
1005       : Bin(B), ExtractPartition(ExtractPartition) {}
1006 };
1007 
1008 class Object {
1009 private:
1010   using SecPtr = std::unique_ptr<SectionBase>;
1011   using SegPtr = std::unique_ptr<Segment>;
1012 
1013   std::vector<SecPtr> Sections;
1014   std::vector<SegPtr> Segments;
1015   std::vector<SecPtr> RemovedSections;
1016 
sectionIsAlloc(const SectionBase & Sec)1017   static bool sectionIsAlloc(const SectionBase &Sec) {
1018     return Sec.Flags & ELF::SHF_ALLOC;
1019   };
1020 
1021 public:
1022   template <class T>
1023   using Range = iterator_range<
1024       pointee_iterator<typename std::vector<std::unique_ptr<T>>::iterator>>;
1025 
1026   template <class T>
1027   using ConstRange = iterator_range<pointee_iterator<
1028       typename std::vector<std::unique_ptr<T>>::const_iterator>>;
1029 
1030   // It is often the case that the ELF header and the program header table are
1031   // not present in any segment. This could be a problem during file layout,
1032   // because other segments may get assigned an offset where either of the
1033   // two should reside, which will effectively corrupt the resulting binary.
1034   // Other than that we use these segments to track program header offsets
1035   // when they may not follow the ELF header.
1036   Segment ElfHdrSegment;
1037   Segment ProgramHdrSegment;
1038 
1039   uint8_t OSABI;
1040   uint8_t ABIVersion;
1041   uint64_t Entry;
1042   uint64_t SHOff;
1043   uint32_t Type;
1044   uint32_t Machine;
1045   uint32_t Version;
1046   uint32_t Flags;
1047 
1048   bool HadShdrs = true;
1049   bool MustBeRelocatable = false;
1050   StringTableSection *SectionNames = nullptr;
1051   SymbolTableSection *SymbolTable = nullptr;
1052   SectionIndexSection *SectionIndexTable = nullptr;
1053 
1054   void sortSections();
sections()1055   SectionTableRef sections() { return SectionTableRef(Sections); }
sections()1056   ConstRange<SectionBase> sections() const {
1057     return make_pointee_range(Sections);
1058   }
1059   iterator_range<
1060       filter_iterator<pointee_iterator<std::vector<SecPtr>::const_iterator>,
1061                       decltype(&sectionIsAlloc)>>
allocSections()1062   allocSections() const {
1063     return make_filter_range(make_pointee_range(Sections), sectionIsAlloc);
1064   }
1065 
findSection(StringRef Name)1066   SectionBase *findSection(StringRef Name) {
1067     auto SecIt =
1068         find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; });
1069     return SecIt == Sections.end() ? nullptr : SecIt->get();
1070   }
removedSections()1071   SectionTableRef removedSections() { return SectionTableRef(RemovedSections); }
1072 
segments()1073   Range<Segment> segments() { return make_pointee_range(Segments); }
segments()1074   ConstRange<Segment> segments() const { return make_pointee_range(Segments); }
1075 
1076   Error removeSections(bool AllowBrokenLinks,
1077                        std::function<bool(const SectionBase &)> ToRemove);
1078   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
addSection(Ts &&...Args)1079   template <class T, class... Ts> T &addSection(Ts &&... Args) {
1080     auto Sec = std::make_unique<T>(std::forward<Ts>(Args)...);
1081     auto Ptr = Sec.get();
1082     MustBeRelocatable |= isa<RelocationSection>(*Ptr);
1083     Sections.emplace_back(std::move(Sec));
1084     Ptr->Index = Sections.size();
1085     return *Ptr;
1086   }
1087   Error addNewSymbolTable();
addSegment(ArrayRef<uint8_t> Data)1088   Segment &addSegment(ArrayRef<uint8_t> Data) {
1089     Segments.emplace_back(std::make_unique<Segment>(Data));
1090     return *Segments.back();
1091   }
isRelocatable()1092   bool isRelocatable() const {
1093     return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable;
1094   }
1095 };
1096 
1097 } // end namespace elf
1098 } // end namespace objcopy
1099 } // end namespace llvm
1100 
1101 #endif // LLVM_TOOLS_OBJCOPY_OBJECT_H
1102