1 //===- EhFrame.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_EH_FRAME_H 10 #define LLD_MACHO_EH_FRAME_H 11 12 #include "InputSection.h" 13 #include "Relocations.h" 14 15 #include "lld/Common/LLVM.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/PointerUnion.h" 18 #include "llvm/ADT/SmallVector.h" 19 20 /* 21 * NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it 22 * is closely coupled with other file parsing logic; EhFrame.h just contains a 23 * few helpers. 24 */ 25 26 /* 27 * === The EH frame format === 28 * 29 * EH frames can either be Common Information Entries (CIEs) or Frame 30 * Description Entries (FDEs). CIEs contain information that is common amongst 31 * several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame 32 * entries together form a forest of two-level trees, with CIEs as the roots 33 * and FDEs as the leaves. Note that a CIE must precede the FDEs which point 34 * to it. 35 * 36 * A CIE comprises the following fields in order: 37 * 1. Length of the entry (4 or 12 bytes) 38 * 2. CIE offset (4 bytes; always 0 for CIEs) 39 * 3. CIE version (byte) 40 * 4. Null-terminated augmentation string 41 * 5-8. LEB128 values that we don't care about 42 * 9. Augmentation data, to be interpreted using the aug string 43 * 10. DWARF instructions (ignored by LLD) 44 * 45 * An FDE comprises of the following: 46 * 1. Length of the entry (4 or 12 bytes) 47 * 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE) 48 * 3. Function address (pointer-sized pcrel offset) 49 * 4. (Optional) Augmentation data length 50 * 5. (Optional) LSDA address (pointer-sized pcrel offset) 51 * 6. DWARF instructions (ignored by LLD) 52 */ 53 namespace lld { 54 namespace macho { 55 56 class EhReader { 57 public: 58 EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff, 59 size_t wordSize) 60 : file(file), data(data), dataOff(dataOff), wordSize(wordSize) {} 61 size_t size() const { return data.size(); } 62 // Read and validate the length field. 63 uint64_t readLength(size_t *off) const; 64 // Skip the length field without doing validation. 65 void skipValidLength(size_t *off) const; 66 uint8_t readByte(size_t *off) const; 67 uint32_t readU32(size_t *off) const; 68 uint64_t readPointer(size_t *off) const; 69 StringRef readString(size_t *off) const; 70 void skipLeb128(size_t *off) const; 71 void failOn(size_t errOff, const Twine &msg) const; 72 73 private: 74 const ObjFile *file; 75 ArrayRef<uint8_t> data; 76 // The offset of the data array within its section. Used only for error 77 // reporting. 78 const size_t dataOff; 79 size_t wordSize; 80 }; 81 82 // The EH frame format, when emitted by llvm-mc, consists of a number of 83 // "abs-ified" relocations, i.e. relocations that are implicitly encoded as 84 // pcrel offsets in the section data. The offsets refer to the locations of 85 // symbols in the input object file. When we ingest these EH frames, we convert 86 // these implicit relocations into explicit Relocs. 87 // 88 // These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4. 89 // However, we need this operation to be cross-platform, and ARM does not have a 90 // similar relocation that is applicable. We therefore use the more verbose (but 91 // more generic) subtractor relocation to encode these pcrel values. ld64 92 // appears to do something similar -- its `-r` output contains these explicit 93 // subtractor relocations. 94 class EhRelocator { 95 public: 96 EhRelocator(InputSection *isec) : isec(isec) {} 97 98 // For the next two methods, let `PC` denote `isec address + off`. 99 // Create relocs writing the value of target - PC to PC. 100 void makePcRel(uint64_t off, 101 llvm::PointerUnion<Symbol *, InputSection *> target, 102 uint8_t length); 103 // Create relocs writing the value of PC - target to PC. 104 void makeNegativePcRel(uint64_t off, 105 llvm::PointerUnion<Symbol *, InputSection *> target, 106 uint8_t length); 107 // Insert the new relocations into isec->relocs. 108 void commit(); 109 110 private: 111 InputSection *isec; 112 // Insert new relocs here so that we don't invalidate iterators into the 113 // existing relocs vector. 114 SmallVector<Reloc, 6> newRelocs; 115 }; 116 117 } // namespace macho 118 } // namespace lld 119 120 #endif 121