1 //===- Object.h -------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_OBJCOPY_OBJECT_H
10 #define LLVM_TOOLS_OBJCOPY_OBJECT_H
11 
12 #include "Buffer.h"
13 #include "CopyConfig.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/BinaryFormat/ELF.h"
18 #include "llvm/MC/StringTableBuilder.h"
19 #include "llvm/Object/ELFObjectFile.h"
20 #include "llvm/Support/Errc.h"
21 #include "llvm/Support/FileOutputBuffer.h"
22 #include <cstddef>
23 #include <cstdint>
24 #include <functional>
25 #include <memory>
26 #include <set>
27 #include <vector>
28 
29 namespace llvm {
30 enum class DebugCompressionType;
31 namespace objcopy {
32 namespace elf {
33 
34 class SectionBase;
35 class Section;
36 class OwnedDataSection;
37 class StringTableSection;
38 class SymbolTableSection;
39 class RelocationSection;
40 class DynamicRelocationSection;
41 class GnuDebugLinkSection;
42 class GroupSection;
43 class SectionIndexSection;
44 class CompressedSection;
45 class DecompressedSection;
46 class Segment;
47 class Object;
48 struct Symbol;
49 
50 class SectionTableRef {
51   MutableArrayRef<std::unique_ptr<SectionBase>> Sections;
52 
53 public:
54   using iterator = pointee_iterator<std::unique_ptr<SectionBase> *>;
55 
SectionTableRef(MutableArrayRef<std::unique_ptr<SectionBase>> Secs)56   explicit SectionTableRef(MutableArrayRef<std::unique_ptr<SectionBase>> Secs)
57       : Sections(Secs) {}
58   SectionTableRef(const SectionTableRef &) = default;
59 
begin()60   iterator begin() const { return iterator(Sections.data()); }
end()61   iterator end() const { return iterator(Sections.data() + Sections.size()); }
size()62   size_t size() const { return Sections.size(); }
63 
64   SectionBase *getSection(uint32_t Index, Twine ErrMsg);
65 
66   template <class T>
67   T *getSectionOfType(uint32_t Index, Twine IndexErrMsg, Twine TypeErrMsg);
68 };
69 
70 enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE };
71 
72 class SectionVisitor {
73 public:
74   virtual ~SectionVisitor() = default;
75 
76   virtual void visit(const Section &Sec) = 0;
77   virtual void visit(const OwnedDataSection &Sec) = 0;
78   virtual void visit(const StringTableSection &Sec) = 0;
79   virtual void visit(const SymbolTableSection &Sec) = 0;
80   virtual void visit(const RelocationSection &Sec) = 0;
81   virtual void visit(const DynamicRelocationSection &Sec) = 0;
82   virtual void visit(const GnuDebugLinkSection &Sec) = 0;
83   virtual void visit(const GroupSection &Sec) = 0;
84   virtual void visit(const SectionIndexSection &Sec) = 0;
85   virtual void visit(const CompressedSection &Sec) = 0;
86   virtual void visit(const DecompressedSection &Sec) = 0;
87 };
88 
89 class MutableSectionVisitor {
90 public:
91   virtual ~MutableSectionVisitor() = default;
92 
93   virtual void visit(Section &Sec) = 0;
94   virtual void visit(OwnedDataSection &Sec) = 0;
95   virtual void visit(StringTableSection &Sec) = 0;
96   virtual void visit(SymbolTableSection &Sec) = 0;
97   virtual void visit(RelocationSection &Sec) = 0;
98   virtual void visit(DynamicRelocationSection &Sec) = 0;
99   virtual void visit(GnuDebugLinkSection &Sec) = 0;
100   virtual void visit(GroupSection &Sec) = 0;
101   virtual void visit(SectionIndexSection &Sec) = 0;
102   virtual void visit(CompressedSection &Sec) = 0;
103   virtual void visit(DecompressedSection &Sec) = 0;
104 };
105 
106 class SectionWriter : public SectionVisitor {
107 protected:
108   Buffer &Out;
109 
110 public:
111   virtual ~SectionWriter() = default;
112 
113   void visit(const Section &Sec) override;
114   void visit(const OwnedDataSection &Sec) override;
115   void visit(const StringTableSection &Sec) override;
116   void visit(const DynamicRelocationSection &Sec) override;
117   virtual void visit(const SymbolTableSection &Sec) override = 0;
118   virtual void visit(const RelocationSection &Sec) override = 0;
119   virtual void visit(const GnuDebugLinkSection &Sec) override = 0;
120   virtual void visit(const GroupSection &Sec) override = 0;
121   virtual void visit(const SectionIndexSection &Sec) override = 0;
122   virtual void visit(const CompressedSection &Sec) override = 0;
123   virtual void visit(const DecompressedSection &Sec) override = 0;
124 
SectionWriter(Buffer & Buf)125   explicit SectionWriter(Buffer &Buf) : Out(Buf) {}
126 };
127 
128 template <class ELFT> class ELFSectionWriter : public SectionWriter {
129 private:
130   using Elf_Word = typename ELFT::Word;
131   using Elf_Rel = typename ELFT::Rel;
132   using Elf_Rela = typename ELFT::Rela;
133   using Elf_Sym = typename ELFT::Sym;
134 
135 public:
~ELFSectionWriter()136   virtual ~ELFSectionWriter() {}
137   void visit(const SymbolTableSection &Sec) override;
138   void visit(const RelocationSection &Sec) override;
139   void visit(const GnuDebugLinkSection &Sec) override;
140   void visit(const GroupSection &Sec) override;
141   void visit(const SectionIndexSection &Sec) override;
142   void visit(const CompressedSection &Sec) override;
143   void visit(const DecompressedSection &Sec) override;
144 
ELFSectionWriter(Buffer & Buf)145   explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
146 };
147 
148 template <class ELFT> class ELFSectionSizer : public MutableSectionVisitor {
149 private:
150   using Elf_Rel = typename ELFT::Rel;
151   using Elf_Rela = typename ELFT::Rela;
152   using Elf_Sym = typename ELFT::Sym;
153   using Elf_Word = typename ELFT::Word;
154   using Elf_Xword = typename ELFT::Xword;
155 
156 public:
157   void visit(Section &Sec) override;
158   void visit(OwnedDataSection &Sec) override;
159   void visit(StringTableSection &Sec) override;
160   void visit(DynamicRelocationSection &Sec) override;
161   void visit(SymbolTableSection &Sec) override;
162   void visit(RelocationSection &Sec) override;
163   void visit(GnuDebugLinkSection &Sec) override;
164   void visit(GroupSection &Sec) override;
165   void visit(SectionIndexSection &Sec) override;
166   void visit(CompressedSection &Sec) override;
167   void visit(DecompressedSection &Sec) override;
168 };
169 
170 #define MAKE_SEC_WRITER_FRIEND                                                 \
171   friend class SectionWriter;                                                  \
172   friend class IHexSectionWriterBase;                                          \
173   friend class IHexSectionWriter;                                              \
174   template <class ELFT> friend class ELFSectionWriter;                         \
175   template <class ELFT> friend class ELFSectionSizer;
176 
177 class BinarySectionWriter : public SectionWriter {
178 public:
~BinarySectionWriter()179   virtual ~BinarySectionWriter() {}
180 
181   void visit(const SymbolTableSection &Sec) override;
182   void visit(const RelocationSection &Sec) override;
183   void visit(const GnuDebugLinkSection &Sec) override;
184   void visit(const GroupSection &Sec) override;
185   void visit(const SectionIndexSection &Sec) override;
186   void visit(const CompressedSection &Sec) override;
187   void visit(const DecompressedSection &Sec) override;
188 
BinarySectionWriter(Buffer & Buf)189   explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
190 };
191 
192 using IHexLineData = SmallVector<char, 64>;
193 
194 struct IHexRecord {
195   // Memory address of the record.
196   uint16_t Addr;
197   // Record type (see below).
198   uint16_t Type;
199   // Record data in hexadecimal form.
200   StringRef HexData;
201 
202   // Helper method to get file length of the record
203   // including newline character
getLengthIHexRecord204   static size_t getLength(size_t DataSize) {
205     // :LLAAAATT[DD...DD]CC'
206     return DataSize * 2 + 11;
207   }
208 
209   // Gets length of line in a file (getLength + CRLF).
getLineLengthIHexRecord210   static size_t getLineLength(size_t DataSize) {
211     return getLength(DataSize) + 2;
212   }
213 
214   // Given type, address and data returns line which can
215   // be written to output file.
216   static IHexLineData getLine(uint8_t Type, uint16_t Addr,
217                               ArrayRef<uint8_t> Data);
218 
219   // Parses the line and returns record if possible.
220   // Line should be trimmed from whitespace characters.
221   static Expected<IHexRecord> parse(StringRef Line);
222 
223   // Calculates checksum of stringified record representation
224   // S must NOT contain leading ':' and trailing whitespace
225   // characters
226   static uint8_t getChecksum(StringRef S);
227 
228   enum Type {
229     // Contains data and a 16-bit starting address for the data.
230     // The byte count specifies number of data bytes in the record.
231     Data = 0,
232     // Must occur exactly once per file in the last line of the file.
233     // The data field is empty (thus byte count is 00) and the address
234     // field is typically 0000.
235     EndOfFile = 1,
236     // The data field contains a 16-bit segment base address (thus byte
237     // count is always 02) compatible with 80x86 real mode addressing.
238     // The address field (typically 0000) is ignored. The segment address
239     // from the most recent 02 record is multiplied by 16 and added to each
240     // subsequent data record address to form the physical starting address
241     // for the data. This allows addressing up to one megabyte of address
242     // space.
243     SegmentAddr = 2,
244     // or 80x86 processors, specifies the initial content of the CS:IP
245     // registers. The address field is 0000, the byte count is always 04,
246     // the first two data bytes are the CS value, the latter two are the
247     // IP value.
248     StartAddr80x86 = 3,
249     // Allows for 32 bit addressing (up to 4GiB). The record's address field
250     // is ignored (typically 0000) and its byte count is always 02. The two
251     // data bytes (big endian) specify the upper 16 bits of the 32 bit
252     // absolute address for all subsequent type 00 records
253     ExtendedAddr = 4,
254     // The address field is 0000 (not used) and the byte count is always 04.
255     // The four data bytes represent a 32-bit address value. In the case of
256     // 80386 and higher CPUs, this address is loaded into the EIP register.
257     StartAddr = 5,
258     // We have no other valid types
259     InvalidType = 6
260   };
261 };
262 
263 // Base class for IHexSectionWriter. This class implements writing algorithm,
264 // but doesn't actually write records. It is used for output buffer size
265 // calculation in IHexWriter::finalize.
266 class IHexSectionWriterBase : public BinarySectionWriter {
267   // 20-bit segment address
268   uint32_t SegmentAddr = 0;
269   // Extended linear address
270   uint32_t BaseAddr = 0;
271 
272   // Write segment address corresponding to 'Addr'
273   uint64_t writeSegmentAddr(uint64_t Addr);
274   // Write extended linear (base) address corresponding to 'Addr'
275   uint64_t writeBaseAddr(uint64_t Addr);
276 
277 protected:
278   // Offset in the output buffer
279   uint64_t Offset = 0;
280 
281   void writeSection(const SectionBase *Sec, ArrayRef<uint8_t> Data);
282   virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data);
283 
284 public:
IHexSectionWriterBase(Buffer & Buf)285   explicit IHexSectionWriterBase(Buffer &Buf) : BinarySectionWriter(Buf) {}
286 
getBufferOffset()287   uint64_t getBufferOffset() const { return Offset; }
288   void visit(const Section &Sec) final;
289   void visit(const OwnedDataSection &Sec) final;
290   void visit(const StringTableSection &Sec) override;
291   void visit(const DynamicRelocationSection &Sec) final;
292   using BinarySectionWriter::visit;
293 };
294 
295 // Real IHEX section writer
296 class IHexSectionWriter : public IHexSectionWriterBase {
297 public:
IHexSectionWriter(Buffer & Buf)298   IHexSectionWriter(Buffer &Buf) : IHexSectionWriterBase(Buf) {}
299 
300   void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override;
301   void visit(const StringTableSection &Sec) override;
302 };
303 
304 class Writer {
305 protected:
306   Object &Obj;
307   Buffer &Buf;
308 
309 public:
310   virtual ~Writer();
311   virtual Error finalize() = 0;
312   virtual Error write() = 0;
313 
Writer(Object & O,Buffer & B)314   Writer(Object &O, Buffer &B) : Obj(O), Buf(B) {}
315 };
316 
317 template <class ELFT> class ELFWriter : public Writer {
318 private:
319   using Elf_Addr = typename ELFT::Addr;
320   using Elf_Shdr = typename ELFT::Shdr;
321   using Elf_Phdr = typename ELFT::Phdr;
322   using Elf_Ehdr = typename ELFT::Ehdr;
323 
324   void initEhdrSegment();
325 
326   void writeEhdr();
327   void writePhdr(const Segment &Seg);
328   void writeShdr(const SectionBase &Sec);
329 
330   void writePhdrs();
331   void writeShdrs();
332   void writeSectionData();
333   void writeSegmentData();
334 
335   void assignOffsets();
336 
337   std::unique_ptr<ELFSectionWriter<ELFT>> SecWriter;
338 
339   size_t totalSize() const;
340 
341 public:
~ELFWriter()342   virtual ~ELFWriter() {}
343   bool WriteSectionHeaders;
344 
345   // For --only-keep-debug, select an alternative section/segment layout
346   // algorithm.
347   bool OnlyKeepDebug;
348 
349   Error finalize() override;
350   Error write() override;
351   ELFWriter(Object &Obj, Buffer &Buf, bool WSH, bool OnlyKeepDebug);
352 };
353 
354 class BinaryWriter : public Writer {
355 private:
356   std::unique_ptr<BinarySectionWriter> SecWriter;
357 
358   uint64_t TotalSize = 0;
359 
360 public:
~BinaryWriter()361   ~BinaryWriter() {}
362   Error finalize() override;
363   Error write() override;
BinaryWriter(Object & Obj,Buffer & Buf)364   BinaryWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
365 };
366 
367 class IHexWriter : public Writer {
368   struct SectionCompare {
369     bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const;
370   };
371 
372   std::set<const SectionBase *, SectionCompare> Sections;
373   size_t TotalSize = 0;
374 
375   Error checkSection(const SectionBase &Sec);
376   uint64_t writeEntryPointRecord(uint8_t *Buf);
377   uint64_t writeEndOfFileRecord(uint8_t *Buf);
378 
379 public:
~IHexWriter()380   ~IHexWriter() {}
381   Error finalize() override;
382   Error write() override;
IHexWriter(Object & Obj,Buffer & Buf)383   IHexWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
384 };
385 
386 class SectionBase {
387 public:
388   std::string Name;
389   Segment *ParentSegment = nullptr;
390   uint64_t HeaderOffset = 0;
391   uint32_t Index = 0;
392   bool HasSymbol = false;
393 
394   uint64_t OriginalFlags = 0;
395   uint64_t OriginalType = ELF::SHT_NULL;
396   uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max();
397 
398   uint64_t Addr = 0;
399   uint64_t Align = 1;
400   uint32_t EntrySize = 0;
401   uint64_t Flags = 0;
402   uint64_t Info = 0;
403   uint64_t Link = ELF::SHN_UNDEF;
404   uint64_t NameIndex = 0;
405   uint64_t Offset = 0;
406   uint64_t Size = 0;
407   uint64_t Type = ELF::SHT_NULL;
408   ArrayRef<uint8_t> OriginalData;
409 
410   SectionBase() = default;
411   SectionBase(const SectionBase &) = default;
412 
413   virtual ~SectionBase() = default;
414 
415   virtual void initialize(SectionTableRef SecTable);
416   virtual void finalize();
417   // Remove references to these sections. The list of sections must be sorted.
418   virtual Error
419   removeSectionReferences(bool AllowBrokenLinks,
420                           function_ref<bool(const SectionBase *)> ToRemove);
421   virtual Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
422   virtual void accept(SectionVisitor &Visitor) const = 0;
423   virtual void accept(MutableSectionVisitor &Visitor) = 0;
424   virtual void markSymbols();
425   virtual void
426   replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &);
427   // Notify the section that it is subject to removal.
428   virtual void onRemove();
429 };
430 
431 class Segment {
432 private:
433   struct SectionCompare {
operatorSectionCompare434     bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const {
435       // Some sections might have the same address if one of them is empty. To
436       // fix this we can use the lexicographic ordering on ->Addr and the
437       // address of the actully stored section.
438       if (Lhs->OriginalOffset == Rhs->OriginalOffset)
439         return Lhs < Rhs;
440       return Lhs->OriginalOffset < Rhs->OriginalOffset;
441     }
442   };
443 
444 public:
445   uint32_t Type = 0;
446   uint32_t Flags = 0;
447   uint64_t Offset = 0;
448   uint64_t VAddr = 0;
449   uint64_t PAddr = 0;
450   uint64_t FileSize = 0;
451   uint64_t MemSize = 0;
452   uint64_t Align = 0;
453 
454   uint32_t Index = 0;
455   uint64_t OriginalOffset = 0;
456   Segment *ParentSegment = nullptr;
457   ArrayRef<uint8_t> Contents;
458   std::set<const SectionBase *, SectionCompare> Sections;
459 
Segment(ArrayRef<uint8_t> Data)460   explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {}
461   Segment() = default;
462 
firstSection()463   const SectionBase *firstSection() const {
464     if (!Sections.empty())
465       return *Sections.begin();
466     return nullptr;
467   }
468 
removeSection(const SectionBase * Sec)469   void removeSection(const SectionBase *Sec) { Sections.erase(Sec); }
addSection(const SectionBase * Sec)470   void addSection(const SectionBase *Sec) { Sections.insert(Sec); }
471 
getContents()472   ArrayRef<uint8_t> getContents() const { return Contents; }
473 };
474 
475 class Section : public SectionBase {
476   MAKE_SEC_WRITER_FRIEND
477 
478   ArrayRef<uint8_t> Contents;
479   SectionBase *LinkSection = nullptr;
480 
481 public:
Section(ArrayRef<uint8_t> Data)482   explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {}
483 
484   void accept(SectionVisitor &Visitor) const override;
485   void accept(MutableSectionVisitor &Visitor) override;
486   Error removeSectionReferences(bool AllowBrokenLinks,
487       function_ref<bool(const SectionBase *)> ToRemove) override;
488   void initialize(SectionTableRef SecTable) override;
489   void finalize() override;
490 };
491 
492 class OwnedDataSection : public SectionBase {
493   MAKE_SEC_WRITER_FRIEND
494 
495   std::vector<uint8_t> Data;
496 
497 public:
OwnedDataSection(StringRef SecName,ArrayRef<uint8_t> Data)498   OwnedDataSection(StringRef SecName, ArrayRef<uint8_t> Data)
499       : Data(std::begin(Data), std::end(Data)) {
500     Name = SecName.str();
501     Type = OriginalType = ELF::SHT_PROGBITS;
502     Size = Data.size();
503     OriginalOffset = std::numeric_limits<uint64_t>::max();
504   }
505 
OwnedDataSection(const Twine & SecName,uint64_t SecAddr,uint64_t SecFlags,uint64_t SecOff)506   OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags,
507                    uint64_t SecOff) {
508     Name = SecName.str();
509     Type = OriginalType = ELF::SHT_PROGBITS;
510     Addr = SecAddr;
511     Flags = OriginalFlags = SecFlags;
512     OriginalOffset = SecOff;
513   }
514 
515   void appendHexData(StringRef HexData);
516   void accept(SectionVisitor &Sec) const override;
517   void accept(MutableSectionVisitor &Visitor) override;
518 };
519 
520 class CompressedSection : public SectionBase {
521   MAKE_SEC_WRITER_FRIEND
522 
523   DebugCompressionType CompressionType;
524   uint64_t DecompressedSize;
525   uint64_t DecompressedAlign;
526   SmallVector<char, 128> CompressedData;
527 
528 public:
529   CompressedSection(const SectionBase &Sec,
530                     DebugCompressionType CompressionType);
531   CompressedSection(ArrayRef<uint8_t> CompressedData, uint64_t DecompressedSize,
532                     uint64_t DecompressedAlign);
533 
getDecompressedSize()534   uint64_t getDecompressedSize() const { return DecompressedSize; }
getDecompressedAlign()535   uint64_t getDecompressedAlign() const { return DecompressedAlign; }
536 
537   void accept(SectionVisitor &Visitor) const override;
538   void accept(MutableSectionVisitor &Visitor) override;
539 
classof(const SectionBase * S)540   static bool classof(const SectionBase *S) {
541     return (S->OriginalFlags & ELF::SHF_COMPRESSED) ||
542            (StringRef(S->Name).startswith(".zdebug"));
543   }
544 };
545 
546 class DecompressedSection : public SectionBase {
547   MAKE_SEC_WRITER_FRIEND
548 
549 public:
DecompressedSection(const CompressedSection & Sec)550   explicit DecompressedSection(const CompressedSection &Sec)
551       : SectionBase(Sec) {
552     Size = Sec.getDecompressedSize();
553     Align = Sec.getDecompressedAlign();
554     Flags = OriginalFlags = (Flags & ~ELF::SHF_COMPRESSED);
555     if (StringRef(Name).startswith(".zdebug"))
556       Name = "." + Name.substr(2);
557   }
558 
559   void accept(SectionVisitor &Visitor) const override;
560   void accept(MutableSectionVisitor &Visitor) override;
561 };
562 
563 // There are two types of string tables that can exist, dynamic and not dynamic.
564 // In the dynamic case the string table is allocated. Changing a dynamic string
565 // table would mean altering virtual addresses and thus the memory image. So
566 // dynamic string tables should not have an interface to modify them or
567 // reconstruct them. This type lets us reconstruct a string table. To avoid
568 // this class being used for dynamic string tables (which has happened) the
569 // classof method checks that the particular instance is not allocated. This
570 // then agrees with the makeSection method used to construct most sections.
571 class StringTableSection : public SectionBase {
572   MAKE_SEC_WRITER_FRIEND
573 
574   StringTableBuilder StrTabBuilder;
575 
576 public:
StringTableSection()577   StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) {
578     Type = OriginalType = ELF::SHT_STRTAB;
579   }
580 
581   void addString(StringRef Name);
582   uint32_t findIndex(StringRef Name) const;
583   void prepareForLayout();
584   void accept(SectionVisitor &Visitor) const override;
585   void accept(MutableSectionVisitor &Visitor) override;
586 
classof(const SectionBase * S)587   static bool classof(const SectionBase *S) {
588     if (S->OriginalFlags & ELF::SHF_ALLOC)
589       return false;
590     return S->OriginalType == ELF::SHT_STRTAB;
591   }
592 };
593 
594 // Symbols have a st_shndx field that normally stores an index but occasionally
595 // stores a different special value. This enum keeps track of what the st_shndx
596 // field means. Most of the values are just copies of the special SHN_* values.
597 // SYMBOL_SIMPLE_INDEX means that the st_shndx is just an index of a section.
598 enum SymbolShndxType {
599   SYMBOL_SIMPLE_INDEX = 0,
600   SYMBOL_ABS = ELF::SHN_ABS,
601   SYMBOL_COMMON = ELF::SHN_COMMON,
602   SYMBOL_LOPROC = ELF::SHN_LOPROC,
603   SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS,
604   SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON,
605   SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2,
606   SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4,
607   SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8,
608   SYMBOL_HIPROC = ELF::SHN_HIPROC,
609   SYMBOL_LOOS = ELF::SHN_LOOS,
610   SYMBOL_HIOS = ELF::SHN_HIOS,
611   SYMBOL_XINDEX = ELF::SHN_XINDEX,
612 };
613 
614 struct Symbol {
615   uint8_t Binding;
616   SectionBase *DefinedIn = nullptr;
617   SymbolShndxType ShndxType;
618   uint32_t Index;
619   std::string Name;
620   uint32_t NameIndex;
621   uint64_t Size;
622   uint8_t Type;
623   uint64_t Value;
624   uint8_t Visibility;
625   bool Referenced = false;
626 
627   uint16_t getShndx() const;
628   bool isCommon() const;
629 };
630 
631 class SectionIndexSection : public SectionBase {
632   MAKE_SEC_WRITER_FRIEND
633 
634 private:
635   std::vector<uint32_t> Indexes;
636   SymbolTableSection *Symbols = nullptr;
637 
638 public:
~SectionIndexSection()639   virtual ~SectionIndexSection() {}
addIndex(uint32_t Index)640   void addIndex(uint32_t Index) {
641     assert(Size > 0);
642     Indexes.push_back(Index);
643   }
644 
reserve(size_t NumSymbols)645   void reserve(size_t NumSymbols) {
646     Indexes.reserve(NumSymbols);
647     Size = NumSymbols * 4;
648   }
setSymTab(SymbolTableSection * SymTab)649   void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; }
650   void initialize(SectionTableRef SecTable) override;
651   void finalize() override;
652   void accept(SectionVisitor &Visitor) const override;
653   void accept(MutableSectionVisitor &Visitor) override;
654 
SectionIndexSection()655   SectionIndexSection() {
656     Name = ".symtab_shndx";
657     Align = 4;
658     EntrySize = 4;
659     Type = OriginalType = ELF::SHT_SYMTAB_SHNDX;
660   }
661 };
662 
663 class SymbolTableSection : public SectionBase {
664   MAKE_SEC_WRITER_FRIEND
665 
setStrTab(StringTableSection * StrTab)666   void setStrTab(StringTableSection *StrTab) { SymbolNames = StrTab; }
667   void assignIndices();
668 
669 protected:
670   std::vector<std::unique_ptr<Symbol>> Symbols;
671   StringTableSection *SymbolNames = nullptr;
672   SectionIndexSection *SectionIndexTable = nullptr;
673 
674   using SymPtr = std::unique_ptr<Symbol>;
675 
676 public:
SymbolTableSection()677   SymbolTableSection() { Type = OriginalType = ELF::SHT_SYMTAB; }
678 
679   void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn,
680                  uint64_t Value, uint8_t Visibility, uint16_t Shndx,
681                  uint64_t SymbolSize);
682   void prepareForLayout();
683   // An 'empty' symbol table still contains a null symbol.
empty()684   bool empty() const { return Symbols.size() == 1; }
setShndxTable(SectionIndexSection * ShndxTable)685   void setShndxTable(SectionIndexSection *ShndxTable) {
686     SectionIndexTable = ShndxTable;
687   }
getShndxTable()688   const SectionIndexSection *getShndxTable() const { return SectionIndexTable; }
689   void fillShndxTable();
getStrTab()690   const SectionBase *getStrTab() const { return SymbolNames; }
691   const Symbol *getSymbolByIndex(uint32_t Index) const;
692   Symbol *getSymbolByIndex(uint32_t Index);
693   void updateSymbols(function_ref<void(Symbol &)> Callable);
694 
695   Error removeSectionReferences(bool AllowBrokenLinks,
696       function_ref<bool(const SectionBase *)> ToRemove) override;
697   void initialize(SectionTableRef SecTable) override;
698   void finalize() override;
699   void accept(SectionVisitor &Visitor) const override;
700   void accept(MutableSectionVisitor &Visitor) override;
701   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
702   void replaceSectionReferences(
703       const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
704 
classof(const SectionBase * S)705   static bool classof(const SectionBase *S) {
706     return S->OriginalType == ELF::SHT_SYMTAB;
707   }
708 };
709 
710 struct Relocation {
711   Symbol *RelocSymbol = nullptr;
712   uint64_t Offset;
713   uint64_t Addend;
714   uint32_t Type;
715 };
716 
717 // All relocation sections denote relocations to apply to another section.
718 // However, some relocation sections use a dynamic symbol table and others use
719 // a regular symbol table. Because the types of the two symbol tables differ in
720 // our system (because they should behave differently) we can't uniformly
721 // represent all relocations with the same base class if we expose an interface
722 // that mentions the symbol table type. So we split the two base types into two
723 // different classes, one which handles the section the relocation is applied to
724 // and another which handles the symbol table type. The symbol table type is
725 // taken as a type parameter to the class (see RelocSectionWithSymtabBase).
726 class RelocationSectionBase : public SectionBase {
727 protected:
728   SectionBase *SecToApplyRel = nullptr;
729 
730 public:
getSection()731   const SectionBase *getSection() const { return SecToApplyRel; }
setSection(SectionBase * Sec)732   void setSection(SectionBase *Sec) { SecToApplyRel = Sec; }
733 
classof(const SectionBase * S)734   static bool classof(const SectionBase *S) {
735     return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
736   }
737 };
738 
739 // Takes the symbol table type to use as a parameter so that we can deduplicate
740 // that code between the two symbol table types.
741 template <class SymTabType>
742 class RelocSectionWithSymtabBase : public RelocationSectionBase {
setSymTab(SymTabType * SymTab)743   void setSymTab(SymTabType *SymTab) { Symbols = SymTab; }
744 
745 protected:
746   RelocSectionWithSymtabBase() = default;
747 
748   SymTabType *Symbols = nullptr;
749 
750 public:
751   void initialize(SectionTableRef SecTable) override;
752   void finalize() override;
753 };
754 
755 class RelocationSection
756     : public RelocSectionWithSymtabBase<SymbolTableSection> {
757   MAKE_SEC_WRITER_FRIEND
758 
759   std::vector<Relocation> Relocations;
760 
761 public:
addRelocation(Relocation Rel)762   void addRelocation(Relocation Rel) { Relocations.push_back(Rel); }
763   void accept(SectionVisitor &Visitor) const override;
764   void accept(MutableSectionVisitor &Visitor) override;
765   Error removeSectionReferences(bool AllowBrokenLinks,
766       function_ref<bool(const SectionBase *)> ToRemove) override;
767   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
768   void markSymbols() override;
769   void replaceSectionReferences(
770       const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
771 
classof(const SectionBase * S)772   static bool classof(const SectionBase *S) {
773     if (S->OriginalFlags & ELF::SHF_ALLOC)
774       return false;
775     return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
776   }
777 };
778 
779 // TODO: The way stripping and groups interact is complicated
780 // and still needs to be worked on.
781 
782 class GroupSection : public SectionBase {
783   MAKE_SEC_WRITER_FRIEND
784   const SymbolTableSection *SymTab = nullptr;
785   Symbol *Sym = nullptr;
786   ELF::Elf32_Word FlagWord;
787   SmallVector<SectionBase *, 3> GroupMembers;
788 
789 public:
790   // TODO: Contents is present in several classes of the hierarchy.
791   // This needs to be refactored to avoid duplication.
792   ArrayRef<uint8_t> Contents;
793 
GroupSection(ArrayRef<uint8_t> Data)794   explicit GroupSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
795 
setSymTab(const SymbolTableSection * SymTabSec)796   void setSymTab(const SymbolTableSection *SymTabSec) { SymTab = SymTabSec; }
setSymbol(Symbol * S)797   void setSymbol(Symbol *S) { Sym = S; }
setFlagWord(ELF::Elf32_Word W)798   void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; }
addMember(SectionBase * Sec)799   void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); }
800 
801   void accept(SectionVisitor &) const override;
802   void accept(MutableSectionVisitor &Visitor) override;
803   void finalize() override;
804   Error removeSectionReferences(
805       bool AllowBrokenLinks,
806       function_ref<bool(const SectionBase *)> ToRemove) override;
807   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
808   void markSymbols() override;
809   void replaceSectionReferences(
810       const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
811   void onRemove() override;
812 
classof(const SectionBase * S)813   static bool classof(const SectionBase *S) {
814     return S->OriginalType == ELF::SHT_GROUP;
815   }
816 };
817 
818 class DynamicSymbolTableSection : public Section {
819 public:
DynamicSymbolTableSection(ArrayRef<uint8_t> Data)820   explicit DynamicSymbolTableSection(ArrayRef<uint8_t> Data) : Section(Data) {}
821 
classof(const SectionBase * S)822   static bool classof(const SectionBase *S) {
823     return S->OriginalType == ELF::SHT_DYNSYM;
824   }
825 };
826 
827 class DynamicSection : public Section {
828 public:
DynamicSection(ArrayRef<uint8_t> Data)829   explicit DynamicSection(ArrayRef<uint8_t> Data) : Section(Data) {}
830 
classof(const SectionBase * S)831   static bool classof(const SectionBase *S) {
832     return S->OriginalType == ELF::SHT_DYNAMIC;
833   }
834 };
835 
836 class DynamicRelocationSection
837     : public RelocSectionWithSymtabBase<DynamicSymbolTableSection> {
838   MAKE_SEC_WRITER_FRIEND
839 
840 private:
841   ArrayRef<uint8_t> Contents;
842 
843 public:
DynamicRelocationSection(ArrayRef<uint8_t> Data)844   explicit DynamicRelocationSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
845 
846   void accept(SectionVisitor &) const override;
847   void accept(MutableSectionVisitor &Visitor) override;
848   Error removeSectionReferences(
849       bool AllowBrokenLinks,
850       function_ref<bool(const SectionBase *)> ToRemove) override;
851 
classof(const SectionBase * S)852   static bool classof(const SectionBase *S) {
853     if (!(S->OriginalFlags & ELF::SHF_ALLOC))
854       return false;
855     return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
856   }
857 };
858 
859 class GnuDebugLinkSection : public SectionBase {
860   MAKE_SEC_WRITER_FRIEND
861 
862 private:
863   StringRef FileName;
864   uint32_t CRC32;
865 
866   void init(StringRef File);
867 
868 public:
869   // If we add this section from an external source we can use this ctor.
870   explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC);
871   void accept(SectionVisitor &Visitor) const override;
872   void accept(MutableSectionVisitor &Visitor) override;
873 };
874 
875 class Reader {
876 public:
877   virtual ~Reader();
878   virtual std::unique_ptr<Object> create(bool EnsureSymtab) const = 0;
879 };
880 
881 using object::Binary;
882 using object::ELFFile;
883 using object::ELFObjectFile;
884 using object::OwningBinary;
885 
886 class BasicELFBuilder {
887 protected:
888   std::unique_ptr<Object> Obj;
889 
890   void initFileHeader();
891   void initHeaderSegment();
892   StringTableSection *addStrTab();
893   SymbolTableSection *addSymTab(StringTableSection *StrTab);
894   void initSections();
895 
896 public:
BasicELFBuilder()897   BasicELFBuilder() : Obj(std::make_unique<Object>()) {}
898 };
899 
900 class BinaryELFBuilder : public BasicELFBuilder {
901   MemoryBuffer *MemBuf;
902   uint8_t NewSymbolVisibility;
903   void addData(SymbolTableSection *SymTab);
904 
905 public:
BinaryELFBuilder(MemoryBuffer * MB,uint8_t NewSymbolVisibility)906   BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility)
907       : BasicELFBuilder(), MemBuf(MB),
908         NewSymbolVisibility(NewSymbolVisibility) {}
909 
910   std::unique_ptr<Object> build();
911 };
912 
913 class IHexELFBuilder : public BasicELFBuilder {
914   const std::vector<IHexRecord> &Records;
915 
916   void addDataSections();
917 
918 public:
IHexELFBuilder(const std::vector<IHexRecord> & Records)919   IHexELFBuilder(const std::vector<IHexRecord> &Records)
920       : BasicELFBuilder(), Records(Records) {}
921 
922   std::unique_ptr<Object> build();
923 };
924 
925 template <class ELFT> class ELFBuilder {
926 private:
927   using Elf_Addr = typename ELFT::Addr;
928   using Elf_Shdr = typename ELFT::Shdr;
929   using Elf_Word = typename ELFT::Word;
930 
931   const ELFFile<ELFT> &ElfFile;
932   Object &Obj;
933   size_t EhdrOffset = 0;
934   Optional<StringRef> ExtractPartition;
935 
936   void setParentSegment(Segment &Child);
937   void readProgramHeaders(const ELFFile<ELFT> &HeadersFile);
938   void initGroupSection(GroupSection *GroupSec);
939   void initSymbolTable(SymbolTableSection *SymTab);
940   void readSectionHeaders();
941   void readSections(bool EnsureSymtab);
942   void findEhdrOffset();
943   SectionBase &makeSection(const Elf_Shdr &Shdr);
944 
945 public:
ELFBuilder(const ELFObjectFile<ELFT> & ElfObj,Object & Obj,Optional<StringRef> ExtractPartition)946   ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj,
947              Optional<StringRef> ExtractPartition)
948       : ElfFile(*ElfObj.getELFFile()), Obj(Obj),
949         ExtractPartition(ExtractPartition) {}
950 
951   void build(bool EnsureSymtab);
952 };
953 
954 class BinaryReader : public Reader {
955   MemoryBuffer *MemBuf;
956   uint8_t NewSymbolVisibility;
957 
958 public:
BinaryReader(MemoryBuffer * MB,const uint8_t NewSymbolVisibility)959   BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility)
960       : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {}
961   std::unique_ptr<Object> create(bool EnsureSymtab) const override;
962 };
963 
964 class IHexReader : public Reader {
965   MemoryBuffer *MemBuf;
966 
967   Expected<std::vector<IHexRecord>> parse() const;
parseError(size_t LineNo,Error E)968   Error parseError(size_t LineNo, Error E) const {
969     return LineNo == -1U
970                ? createFileError(MemBuf->getBufferIdentifier(), std::move(E))
971                : createFileError(MemBuf->getBufferIdentifier(), LineNo,
972                                  std::move(E));
973   }
974   template <typename... Ts>
parseError(size_t LineNo,char const * Fmt,const Ts &...Vals)975   Error parseError(size_t LineNo, char const *Fmt, const Ts &... Vals) const {
976     Error E = createStringError(errc::invalid_argument, Fmt, Vals...);
977     return parseError(LineNo, std::move(E));
978   }
979 
980 public:
IHexReader(MemoryBuffer * MB)981   IHexReader(MemoryBuffer *MB) : MemBuf(MB) {}
982 
983   std::unique_ptr<Object> create(bool EnsureSymtab) const override;
984 };
985 
986 class ELFReader : public Reader {
987   Binary *Bin;
988   Optional<StringRef> ExtractPartition;
989 
990 public:
991   std::unique_ptr<Object> create(bool EnsureSymtab) const override;
ELFReader(Binary * B,Optional<StringRef> ExtractPartition)992   explicit ELFReader(Binary *B, Optional<StringRef> ExtractPartition)
993       : Bin(B), ExtractPartition(ExtractPartition) {}
994 };
995 
996 class Object {
997 private:
998   using SecPtr = std::unique_ptr<SectionBase>;
999   using SegPtr = std::unique_ptr<Segment>;
1000 
1001   std::vector<SecPtr> Sections;
1002   std::vector<SegPtr> Segments;
1003   std::vector<SecPtr> RemovedSections;
1004 
sectionIsAlloc(const SectionBase & Sec)1005   static bool sectionIsAlloc(const SectionBase &Sec) {
1006     return Sec.Flags & ELF::SHF_ALLOC;
1007   };
1008 
1009 public:
1010   template <class T>
1011   using Range = iterator_range<
1012       pointee_iterator<typename std::vector<std::unique_ptr<T>>::iterator>>;
1013 
1014   template <class T>
1015   using ConstRange = iterator_range<pointee_iterator<
1016       typename std::vector<std::unique_ptr<T>>::const_iterator>>;
1017 
1018   // It is often the case that the ELF header and the program header table are
1019   // not present in any segment. This could be a problem during file layout,
1020   // because other segments may get assigned an offset where either of the
1021   // two should reside, which will effectively corrupt the resulting binary.
1022   // Other than that we use these segments to track program header offsets
1023   // when they may not follow the ELF header.
1024   Segment ElfHdrSegment;
1025   Segment ProgramHdrSegment;
1026 
1027   uint8_t OSABI;
1028   uint8_t ABIVersion;
1029   uint64_t Entry;
1030   uint64_t SHOff;
1031   uint32_t Type;
1032   uint32_t Machine;
1033   uint32_t Version;
1034   uint32_t Flags;
1035 
1036   bool HadShdrs = true;
1037   bool MustBeRelocatable = false;
1038   StringTableSection *SectionNames = nullptr;
1039   SymbolTableSection *SymbolTable = nullptr;
1040   SectionIndexSection *SectionIndexTable = nullptr;
1041 
1042   void sortSections();
sections()1043   SectionTableRef sections() { return SectionTableRef(Sections); }
sections()1044   ConstRange<SectionBase> sections() const {
1045     return make_pointee_range(Sections);
1046   }
1047   iterator_range<
1048       filter_iterator<pointee_iterator<std::vector<SecPtr>::const_iterator>,
1049                       decltype(&sectionIsAlloc)>>
allocSections()1050   allocSections() const {
1051     return make_filter_range(make_pointee_range(Sections), sectionIsAlloc);
1052   }
1053 
findSection(StringRef Name)1054   SectionBase *findSection(StringRef Name) {
1055     auto SecIt =
1056         find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; });
1057     return SecIt == Sections.end() ? nullptr : SecIt->get();
1058   }
removedSections()1059   SectionTableRef removedSections() { return SectionTableRef(RemovedSections); }
1060 
segments()1061   Range<Segment> segments() { return make_pointee_range(Segments); }
segments()1062   ConstRange<Segment> segments() const { return make_pointee_range(Segments); }
1063 
1064   Error removeSections(bool AllowBrokenLinks,
1065                        std::function<bool(const SectionBase &)> ToRemove);
1066   Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
addSection(Ts &&...Args)1067   template <class T, class... Ts> T &addSection(Ts &&... Args) {
1068     auto Sec = std::make_unique<T>(std::forward<Ts>(Args)...);
1069     auto Ptr = Sec.get();
1070     MustBeRelocatable |= isa<RelocationSection>(*Ptr);
1071     Sections.emplace_back(std::move(Sec));
1072     Ptr->Index = Sections.size();
1073     return *Ptr;
1074   }
1075   void addNewSymbolTable();
addSegment(ArrayRef<uint8_t> Data)1076   Segment &addSegment(ArrayRef<uint8_t> Data) {
1077     Segments.emplace_back(std::make_unique<Segment>(Data));
1078     return *Segments.back();
1079   }
isRelocatable()1080   bool isRelocatable() const {
1081     return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable;
1082   }
1083 };
1084 
1085 } // end namespace elf
1086 } // end namespace objcopy
1087 } // end namespace llvm
1088 
1089 #endif // LLVM_TOOLS_OBJCOPY_OBJECT_H
1090