1 //===- EhFrame.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_EH_FRAME_H
10 #define LLD_MACHO_EH_FRAME_H
11 
12 #include "InputSection.h"
13 #include "Relocations.h"
14 
15 #include "lld/Common/LLVM.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/PointerUnion.h"
18 #include "llvm/ADT/SmallVector.h"
19 
20 /*
21  * NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it
22  * is closely coupled with other file parsing logic; EhFrame.h just contains a
23  * few helpers.
24  */
25 
26 /*
27  * === The EH frame format ===
28  *
29  * EH frames can either be Common Information Entries (CIEs) or Frame
30  * Description Entries (FDEs). CIEs contain information that is common amongst
31  * several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame
32  * entries together form a forest of two-level trees, with CIEs as the roots
33  * and FDEs as the leaves. Note that a CIE must precede the FDEs which point
34  * to it.
35  *
36  * A CIE comprises the following fields in order:
37  * 1.   Length of the entry (4 or 12 bytes)
38  * 2.   CIE offset (4 bytes; always 0 for CIEs)
39  * 3.   CIE version (byte)
40  * 4.   Null-terminated augmentation string
41  * 5-8. LEB128 values that we don't care about
42  * 9.   Augmentation data, to be interpreted using the aug string
43  * 10.  DWARF instructions (ignored by LLD)
44  *
45  * An FDE comprises of the following:
46  * 1. Length of the entry (4 or 12 bytes)
47  * 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE)
48  * 3. Function address (pointer-sized pcrel offset)
49  * 4. (Optional) Augmentation data length
50  * 5. (Optional) LSDA address (pointer-sized pcrel offset)
51  * 6. DWARF instructions (ignored by LLD)
52  */
53 namespace lld {
54 namespace macho {
55 
56 class EhReader {
57 public:
58   EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff,
59            size_t wordSize)
60       : file(file), data(data), dataOff(dataOff), wordSize(wordSize) {}
61   size_t size() const { return data.size(); }
62   // Read and validate the length field.
63   uint64_t readLength(size_t *off) const;
64   // Skip the length field without doing validation.
65   void skipValidLength(size_t *off) const;
66   uint8_t readByte(size_t *off) const;
67   uint32_t readU32(size_t *off) const;
68   uint64_t readPointer(size_t *off) const;
69   StringRef readString(size_t *off) const;
70   void skipLeb128(size_t *off) const;
71   void failOn(size_t errOff, const Twine &msg) const;
72 
73 private:
74   const ObjFile *file;
75   ArrayRef<uint8_t> data;
76   // The offset of the data array within its section. Used only for error
77   // reporting.
78   const size_t dataOff;
79   size_t wordSize;
80 };
81 
82 // The EH frame format, when emitted by llvm-mc, consists of a number of
83 // "abs-ified" relocations, i.e. relocations that are implicitly encoded as
84 // pcrel offsets in the section data. The offsets refer to the locations of
85 // symbols in the input object file. When we ingest these EH frames, we convert
86 // these implicit relocations into explicit Relocs.
87 //
88 // These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4.
89 // However, we need this operation to be cross-platform, and ARM does not have a
90 // similar relocation that is applicable. We therefore use the more verbose (but
91 // more generic) subtractor relocation to encode these pcrel values. ld64
92 // appears to do something similar -- its `-r` output contains these explicit
93 // subtractor relocations.
94 class EhRelocator {
95 public:
96   EhRelocator(InputSection *isec) : isec(isec) {}
97 
98   // For the next two methods, let `PC` denote `isec address + off`.
99   // Create relocs writing the value of target - PC to PC.
100   void makePcRel(uint64_t off,
101                  llvm::PointerUnion<Symbol *, InputSection *> target,
102                  uint8_t length);
103   // Create relocs writing the value of PC - target to PC.
104   void makeNegativePcRel(uint64_t off,
105                          llvm::PointerUnion<Symbol *, InputSection *> target,
106                          uint8_t length);
107   // Insert the new relocations into isec->relocs.
108   void commit();
109 
110 private:
111   InputSection *isec;
112   // Insert new relocs here so that we don't invalidate iterators into the
113   // existing relocs vector.
114   SmallVector<Reloc, 6> newRelocs;
115 };
116 
117 } // namespace macho
118 } // namespace lld
119 
120 #endif
121