1 // Copyright 2014 Renato Tegon Forti, Antony Polukhin.
2 // Copyright 2015-2020 Antony Polukhin.
3 //
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt
6 // or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #ifndef BOOST_DLL_DETAIL_MACHO_INFO_HPP
9 #define BOOST_DLL_DETAIL_MACHO_INFO_HPP
10 
11 #include <boost/dll/config.hpp>
12 
13 #ifdef BOOST_HAS_PRAGMA_ONCE
14 # pragma once
15 #endif
16 
17 #include <algorithm>
18 #include <fstream>
19 #include <string> // for std::getline
20 
21 #include <boost/cstdint.hpp>
22 
23 namespace boost { namespace dll { namespace detail {
24 
25 typedef int integer_t;
26 typedef int vm_prot_t;
27 typedef integer_t cpu_type_t;
28 typedef integer_t cpu_subtype_t;
29 
30 template <class AddressOffsetT>
31 struct mach_header_template {
32     boost::uint32_t     magic;
33     cpu_type_t          cputype;
34     cpu_subtype_t       cpusubtype;
35     boost::uint32_t     filetype;
36     boost::uint32_t     ncmds;
37     boost::uint32_t     sizeofcmds;
38     boost::uint32_t     flags[sizeof(AddressOffsetT) / sizeof(uint32_t)]; // Flags and reserved
39 };
40 
41 typedef mach_header_template<boost::uint32_t> mach_header_32_;
42 typedef mach_header_template<boost::uint64_t> mach_header_64_;
43 
44 struct load_command_ {
45     boost::uint32_t        cmd;        /* type of command */
46     boost::uint32_t        cmdsize;
47 };
48 
49 struct load_command_types {
50     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SEGMENT_          = 0x1);   /* segment of this file to be mapped */
51     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SYMTAB_           = 0x2);   /* link-edit stab symbol table info */
52     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SYMSEG_           = 0x3);   /* link-edit gdb symbol table info (obsolete) */
53     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_THREAD_           = 0x4);   /* thread */
54     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_UNIXTHREAD_       = 0x5);   /* unix thread (includes a stack) */
55     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_LOADFVMLIB_       = 0x6);   /* load a specified fixed VM shared library */
56     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_IDFVMLIB_         = 0x7);   /* fixed VM shared library identification */
57     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_IDENT_            = 0x8);   /* object identification info (obsolete) */
58     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_FVMFILE_          = 0x9);   /* fixed VM file inclusion (internal use) */
59     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_PREPAGE_          = 0xa);   /* prepage command (internal use) */
60     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_DYSYMTAB_         = 0xb);   /* dynamic link-edit symbol table info */
61     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_LOAD_DYLIB_       = 0xc);   /* load a dynamically linked shared library */
62     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_ID_DYLIB_         = 0xd);   /* dynamically linked shared lib ident */
63     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_LOAD_DYLINKER_    = 0xe);   /* load a dynamic linker */
64     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_ID_DYLINKER_      = 0xf);   /* dynamic linker identification */
65     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_PREBOUND_DYLIB_   = 0x10);  /* modules prebound for a dynamically linked shared library */
66     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_ROUTINES_         = 0x11);  /* image routines */
67     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SUB_FRAMEWORK_    = 0x12);  /* sub framework */
68     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SUB_UMBRELLA_     = 0x13);  /* sub umbrella */
69     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SUB_CLIENT_       = 0x14);  /* sub client */
70     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SUB_LIBRARY_      = 0x15);  /* sub library */
71     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_TWOLEVEL_HINTS_   = 0x16);  /* two-level namespace lookup hints */
72     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_PREBIND_CKSUM_    = 0x17);  /* prebind checksum */
73 /*
74  * After MacOS X 10.1 when a new load command is added that is required to be
75  * understood by the dynamic linker for the image to execute properly the
76  * LC_REQ_DYLD bit will be or'ed into the load command constant.  If the dynamic
77  * linker sees such a load command it it does not understand will issue a
78  * "unknown load command required for execution" error and refuse to use the
79  * image.  Other load commands without this bit that are not understood will
80  * simply be ignored.
81  */
82     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_REQ_DYLD_         = 0x80000000);
83 
84 /*
85  * load a dynamically linked shared library that is allowed to be missing
86  * (all symbols are weak imported).
87  */
88     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_LOAD_WEAK_DYLIB_  = (0x18 | LC_REQ_DYLD_));
89 
90     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SEGMENT_64_       = 0x19);                    /* 64-bit segment of this file to be mapped */
91     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_ROUTINES_64_      = 0x1a);                    /* 64-bit image routines */
92     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_UUID_             = 0x1b);                    /* the uuid */
93     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_RPATH_            = (0x1c | LC_REQ_DYLD_));   /* runpath additions */
94     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_CODE_SIGNATURE_   = 0x1d);                    /* local of code signature */
95     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_SEGMENT_SPLIT_INFO_= 0x1e);                   /* local of info to split segments */
96     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_REEXPORT_DYLIB_   = (0x1f | LC_REQ_DYLD_));   /* load and re-export dylib */
97     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_LAZY_LOAD_DYLIB_  = 0x20);                    /* delay load of dylib until first use */
98     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_ENCRYPTION_INFO_  = 0x21);                    /* encrypted segment information */
99     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_DYLD_INFO_        = 0x22);                    /* compressed dyld information */
100     BOOST_STATIC_CONSTANT(boost::uint32_t, LC_DYLD_INFO_ONLY_   = (0x22|LC_REQ_DYLD_));     /* compressed dyld information only */
101 };
102 
103 template <class AddressOffsetT>
104 struct segment_command_template {
105     boost::uint32_t     cmd;            /* LC_SEGMENT_ */
106     boost::uint32_t     cmdsize;        /* includes sizeof section structs */
107     char                segname[16];    /* segment name */
108     AddressOffsetT      vmaddr;         /* memory address of this segment */
109     AddressOffsetT      vmsize;         /* memory size of this segment */
110     AddressOffsetT      fileoff;        /* file offset of this segment */
111     AddressOffsetT      filesize;       /* amount to map from the file */
112     vm_prot_t           maxprot;        /* maximum VM protection */
113     vm_prot_t           initprot;       /* initial VM protection */
114     boost::uint32_t     nsects;         /* number of sections in segment */
115     boost::uint32_t     flags;          /* flags */
116 };
117 
118 typedef segment_command_template<boost::uint32_t> segment_command_32_;
119 typedef segment_command_template<boost::uint64_t> segment_command_64_;
120 
121 template <class AddressOffsetT>
122 struct section_template {
123     char                sectname[16];   /* name of this section */
124     char                segname[16];    /* segment this section goes in */
125     AddressOffsetT      addr;           /* memory address of this section */
126     AddressOffsetT      size;           /* size in bytes of this section */
127     boost::uint32_t     offset;         /* file offset of this section */
128     boost::uint32_t     align;          /* section alignment (power of 2) */
129     boost::uint32_t     reloff;         /* file offset of relocation entries */
130     boost::uint32_t     nreloc;         /* number of relocation entries */
131     boost::uint32_t     flags;          /* flags (section type and attributes)*/
132     boost::uint32_t     reserved[1 + sizeof(AddressOffsetT) / sizeof(uint32_t)];
133 };
134 
135 typedef section_template<boost::uint32_t> section_32_;
136 typedef section_template<boost::uint64_t> section_64_;
137 
138 struct symtab_command_ {
139     boost::uint32_t    cmd;        /* LC_SYMTAB_ */
140     boost::uint32_t    cmdsize;    /* sizeof(struct symtab_command) */
141     boost::uint32_t    symoff;     /* symbol table offset */
142     boost::uint32_t    nsyms;      /* number of symbol table entries */
143     boost::uint32_t    stroff;     /* string table offset */
144     boost::uint32_t    strsize;    /* string table size in bytes */
145 };
146 
147 template <class AddressOffsetT>
148 struct nlist_template {
149     boost::uint32_t     n_strx;
150     boost::uint8_t      n_type;
151     boost::uint8_t      n_sect;
152     boost::uint16_t     n_desc;
153     AddressOffsetT      n_value;
154 };
155 
156 typedef nlist_template<boost::uint32_t> nlist_32_;
157 typedef nlist_template<boost::uint64_t> nlist_64_;
158 
159 template <class AddressOffsetT>
160 class macho_info {
161     typedef boost::dll::detail::mach_header_template<AddressOffsetT>        header_t;
162     typedef boost::dll::detail::load_command_                               load_command_t;
163     typedef boost::dll::detail::segment_command_template<AddressOffsetT>    segment_t;
164     typedef boost::dll::detail::section_template<AddressOffsetT>            section_t;
165     typedef boost::dll::detail::symtab_command_                             symbol_header_t;
166     typedef boost::dll::detail::nlist_template<AddressOffsetT>              nlist_t;
167 
168     BOOST_STATIC_CONSTANT(boost::uint32_t, SEGMENT_CMD_NUMBER = (sizeof(AddressOffsetT) > 4 ? load_command_types::LC_SEGMENT_64_ : load_command_types::LC_SEGMENT_));
169 
170 public:
parsing_supported(std::ifstream & fs)171     static bool parsing_supported(std::ifstream& fs) {
172         static const uint32_t magic_bytes = (sizeof(AddressOffsetT) <= sizeof(uint32_t) ? 0xfeedface : 0xfeedfacf);
173 
174         uint32_t magic;
175         fs.seekg(0);
176         fs.read(reinterpret_cast<char*>(&magic), sizeof(magic));
177         return (magic_bytes == magic);
178     }
179 
180 private:
181     template <class T>
read_raw(std::ifstream & fs,T & value,std::size_t size=sizeof (T))182     static void read_raw(std::ifstream& fs, T& value, std::size_t size = sizeof(T)) {
183         fs.read(reinterpret_cast<char*>(&value), size);
184     }
185 
186     template <class F>
command_finder(std::ifstream & fs,uint32_t cmd_num,F callback_f)187     static void command_finder(std::ifstream& fs, uint32_t cmd_num, F callback_f) {
188         const header_t h = header(fs);
189         load_command_t command;
190         fs.seekg(sizeof(header_t));
191         for (std::size_t i = 0; i < h.ncmds; ++i) {
192             const std::ifstream::pos_type pos = fs.tellg();
193             read_raw(fs, command);
194             if (command.cmd != cmd_num) {
195                 fs.seekg(pos + static_cast<std::ifstream::pos_type>(command.cmdsize));
196                 continue;
197             }
198 
199             fs.seekg(pos);
200             callback_f(fs);
201             fs.seekg(pos + static_cast<std::ifstream::pos_type>(command.cmdsize));
202         }
203     }
204 
205     struct section_names_gather {
206         std::vector<std::string>&       ret;
207 
operator ()boost::dll::detail::macho_info::section_names_gather208         void operator()(std::ifstream& fs) const {
209             segment_t segment;
210             read_raw(fs, segment);
211 
212             section_t section;
213             ret.reserve(ret.size() + segment.nsects);
214             for (std::size_t j = 0; j < segment.nsects; ++j) {
215                 read_raw(fs, section);
216                 // `segname` goes right after the `sectname`.
217                 // Forcing `sectname` to end on '\0'
218                 section.segname[0] = '\0';
219                 ret.push_back(section.sectname);
220                 if (ret.back().empty()) {
221                     ret.pop_back(); // Do not show empty names
222                 }
223             }
224         }
225     };
226 
227     struct symbol_names_gather {
228         std::vector<std::string>&       ret;
229         std::size_t                     section_index;
230 
operator ()boost::dll::detail::macho_info::symbol_names_gather231         void operator()(std::ifstream& fs) const {
232             symbol_header_t symbh;
233             read_raw(fs, symbh);
234             ret.reserve(ret.size() + symbh.nsyms);
235 
236             nlist_t symbol;
237             std::string symbol_name;
238             for (std::size_t j = 0; j < symbh.nsyms; ++j) {
239                 fs.seekg(symbh.symoff + j * sizeof(nlist_t));
240                 read_raw(fs, symbol);
241                 if (!symbol.n_strx) {
242                     continue; // Symbol has no name
243                 }
244 
245                 if ((symbol.n_type & 0x0e) != 0xe || !symbol.n_sect) {
246                     continue; // Symbol has no section
247                 }
248 
249                 if (section_index && section_index != symbol.n_sect) {
250                     continue; // Not in the required section
251                 }
252 
253                 fs.seekg(symbh.stroff + symbol.n_strx);
254                 std::getline(fs, symbol_name, '\0');
255                 if (symbol_name.empty()) {
256                     continue;
257                 }
258 
259                 if (symbol_name[0] == '_') {
260                     // Linker adds additional '_' symbol. Could not find official docs for that case.
261                     ret.push_back(symbol_name.c_str() + 1);
262                 } else {
263                     ret.push_back(symbol_name);
264                 }
265             }
266         }
267     };
268 
269 public:
sections(std::ifstream & fs)270     static std::vector<std::string> sections(std::ifstream& fs) {
271         std::vector<std::string> ret;
272         section_names_gather f = { ret };
273         command_finder(fs, SEGMENT_CMD_NUMBER, f);
274         return ret;
275     }
276 
277 private:
header(std::ifstream & fs)278     static header_t header(std::ifstream& fs) {
279         header_t h;
280 
281         fs.seekg(0);
282         read_raw(fs, h);
283 
284         return h;
285     }
286 
287 public:
symbols(std::ifstream & fs)288     static std::vector<std::string> symbols(std::ifstream& fs) {
289         std::vector<std::string> ret;
290         symbol_names_gather f = { ret, 0 };
291         command_finder(fs, load_command_types::LC_SYMTAB_, f);
292         return ret;
293     }
294 
symbols(std::ifstream & fs,const char * section_name)295     static std::vector<std::string> symbols(std::ifstream& fs, const char* section_name) {
296         // Not very optimal solution
297         std::vector<std::string> ret = sections(fs);
298         std::vector<std::string>::iterator it = std::find(ret.begin(), ret.end(), section_name);
299         if (it == ret.end()) {
300             // No section with such name
301             ret.clear();
302             return ret;
303         }
304 
305         // section indexes start from 1
306         symbol_names_gather f = { ret, static_cast<std::size_t>(1 + (it - ret.begin())) };
307         ret.clear();
308         command_finder(fs, load_command_types::LC_SYMTAB_, f);
309         return ret;
310     }
311 };
312 
313 typedef macho_info<boost::uint32_t> macho_info32;
314 typedef macho_info<boost::uint64_t> macho_info64;
315 
316 }}} // namespace boost::dll::detail
317 
318 #endif // BOOST_DLL_DETAIL_MACHO_INFO_HPP
319