1 /*!
2  * \file   LibraryInformation-elf_info.cxx
3  * \brief
4  * \author Thomas Helfer
5  * \date   28/03/2017
6  * \copyright Copyright (C) 2006-2018 CEA/DEN, EDF R&D. All rights
7  * reserved.
8  * This project is publicly released under either the GNU GPL Licence
9  * or the CECILL-A licence. A copy of thoses licences are delivered
10  * with the sources of TFEL. CEA or EDF may also distribute this
11  * project under specific licensing conditions.
12  *
13  * \note This file contains code that has been extracted from the
14  * boost/dll library version 1.63 and has been originally written by
15  * Antony Polukhin, Renato Tegon Forti and Antony Polukhin.
16  */
17 
18 namespace details {
19 
20   typedef int integer_t;
21   typedef int vm_prot_t;
22   typedef integer_t cpu_type_t;
23   typedef integer_t cpu_subtype_t;
24 
25   template <class AddressOffsetT>
26   struct mach_header_template {
27     std::uint32_t     magic;
28     cpu_type_t          cputype;
29     cpu_subtype_t       cpusubtype;
30     std::uint32_t     filetype;
31     std::uint32_t     ncmds;
32     std::uint32_t     sizeofcmds;
33     std::uint32_t     flags[sizeof(AddressOffsetT) / sizeof(uint32_t)]; // Flags and reserved
34   };
35 
36   typedef mach_header_template<std::uint32_t> mach_header_32_;
37   typedef mach_header_template<std::uint64_t> mach_header_64_;
38 
39   struct load_command_ {
40     std::uint32_t        cmd;        /* type of command */
41     std::uint32_t        cmdsize;
42   };
43 
44   struct load_command_types {
45     static constexpr const std::uint32_t LC_SEGMENT_          = 0x1;   /* segment of this file to be mapped */
46     static constexpr const std::uint32_t LC_SYMTAB_           = 0x2;   /* link-edit stab symbol table info */
47     static constexpr const std::uint32_t LC_SYMSEG_           = 0x3;   /* link-edit gdb symbol table info (obsolete) */
48     static constexpr const std::uint32_t LC_THREAD_           = 0x4;   /* thread */
49     static constexpr const std::uint32_t LC_UNIXTHREAD_       = 0x5;   /* unix thread (includes a stack) */
50     static constexpr const std::uint32_t LC_LOADFVMLIB_       = 0x6;   /* load a specified fixed VM shared library */
51     static constexpr const std::uint32_t LC_IDFVMLIB_         = 0x7;   /* fixed VM shared library identification */
52     static constexpr const std::uint32_t LC_IDENT_            = 0x8;   /* object identification info (obsolete) */
53     static constexpr const std::uint32_t LC_FVMFILE_          = 0x9;   /* fixed VM file inclusion (internal use) */
54     static constexpr const std::uint32_t LC_PREPAGE_          = 0xa;   /* prepage command (internal use) */
55     static constexpr const std::uint32_t LC_DYSYMTAB_         = 0xb;   /* dynamic link-edit symbol table info */
56     static constexpr const std::uint32_t LC_LOAD_DYLIB_       = 0xc;   /* load a dynamically linked shared library */
57     static constexpr const std::uint32_t LC_ID_DYLIB_         = 0xd;   /* dynamically linked shared lib ident */
58     static constexpr const std::uint32_t LC_LOAD_DYLINKER_    = 0xe;   /* load a dynamic linker */
59     static constexpr const std::uint32_t LC_ID_DYLINKER_      = 0xf;   /* dynamic linker identification */
60     static constexpr const std::uint32_t LC_PREBOUND_DYLIB_   = 0x10;  /* modules prebound for a dynamically linked shared library */
61     static constexpr const std::uint32_t LC_ROUTINES_         = 0x11;  /* image routines */
62     static constexpr const std::uint32_t LC_SUB_FRAMEWORK_    = 0x12;  /* sub framework */
63     static constexpr const std::uint32_t LC_SUB_UMBRELLA_     = 0x13;  /* sub umbrella */
64     static constexpr const std::uint32_t LC_SUB_CLIENT_       = 0x14;  /* sub client */
65     static constexpr const std::uint32_t LC_SUB_LIBRARY_      = 0x15;  /* sub library */
66     static constexpr const std::uint32_t LC_TWOLEVEL_HINTS_   = 0x16;  /* two-level namespace lookup hints */
67     static constexpr const std::uint32_t LC_PREBIND_CKSUM_    = 0x17;  /* prebind checksum */
68     /*
69      * After MacOS X 10.1 when a new load command is added that is required to be
70      * understood by the dynamic linker for the image to execute properly the
71      * LC_REQ_DYLD bit will be or'ed into the load command constant.  If the dynamic
72      * linker sees such a load command it it does not understand will issue a
73      * "unknown load command required for execution" error and refuse to use the
74      * image.  Other load commands without this bit that are not understood will
75      * simply be ignored.
76      */
77     static constexpr const std::uint32_t LC_REQ_DYLD_         = 0x80000000;
78 
79     /*
80      * load a dynamically linked shared library that is allowed to be missing
81      * (all symbols are weak imported).
82      */
83     static constexpr const std::uint32_t LC_LOAD_WEAK_DYLIB_  = (0x18 | LC_REQ_DYLD_);
84 
85     static constexpr const std::uint32_t LC_SEGMENT_64_       = 0x19;                    /* 64-bit segment of this file to be mapped */
86     static constexpr const std::uint32_t LC_ROUTINES_64_      = 0x1a;                    /* 64-bit image routines */
87     static constexpr const std::uint32_t LC_UUID_             = 0x1b;                    /* the uuid */
88     static constexpr const std::uint32_t LC_RPATH_            = (0x1c | LC_REQ_DYLD_);   /* runpath additions */
89     static constexpr const std::uint32_t LC_CODE_SIGNATURE_   = 0x1d;                    /* local of code signature */
90     static constexpr const std::uint32_t LC_SEGMENT_SPLIT_INFO_= 0x1e;                   /* local of info to split segments */
91     static constexpr const std::uint32_t LC_REEXPORT_DYLIB_   = (0x1f | LC_REQ_DYLD_);   /* load and re-export dylib */
92     static constexpr const std::uint32_t LC_LAZY_LOAD_DYLIB_  = 0x20;                    /* delay load of dylib until first use */
93     static constexpr const std::uint32_t LC_ENCRYPTION_INFO_  = 0x21;                    /* encrypted segment information */
94     static constexpr const std::uint32_t LC_DYLD_INFO_        = 0x22;                    /* compressed dyld information */
95     static constexpr const std::uint32_t LC_DYLD_INFO_ONLY_   = (0x22|LC_REQ_DYLD_);     /* compressed dyld information only */
96   };
97 
98   template <class AddressOffsetT>
99   struct segment_command_template {
100     std::uint32_t     cmd;            /* LC_SEGMENT_ */
101     std::uint32_t     cmdsize;        /* includes sizeof section structs */
102     char                segname[16];    /* segment name */
103     AddressOffsetT      vmaddr;         /* memory address of this segment */
104     AddressOffsetT      vmsize;         /* memory size of this segment */
105     AddressOffsetT      fileoff;        /* file offset of this segment */
106     AddressOffsetT      filesize;       /* amount to map from the file */
107     vm_prot_t           maxprot;        /* maximum VM protection */
108     vm_prot_t           initprot;       /* initial VM protection */
109     std::uint32_t     nsects;         /* number of sections in segment */
110     std::uint32_t     flags;          /* flags */
111   };
112 
113   typedef segment_command_template<std::uint32_t> segment_command_32_;
114   typedef segment_command_template<std::uint64_t> segment_command_64_;
115 
116   template <class AddressOffsetT>
117   struct section_template {
118     char                sectname[16];   /* name of this section */
119     char                segname[16];    /* segment this section goes in */
120     AddressOffsetT      addr;           /* memory address of this section */
121     AddressOffsetT      size;           /* size in bytes of this section */
122     std::uint32_t     offset;         /* file offset of this section */
123     std::uint32_t     align;          /* section alignment (power of 2) */
124     std::uint32_t     reloff;         /* file offset of relocation entries */
125     std::uint32_t     nreloc;         /* number of relocation entries */
126     std::uint32_t     flags;          /* flags (section type and attributes)*/
127     std::uint32_t     reserved[1 + sizeof(AddressOffsetT) / sizeof(uint32_t)];
128   };
129 
130   typedef section_template<std::uint32_t> section_32_;
131   typedef section_template<std::uint64_t> section_64_;
132 
133   struct symtab_command_ {
134     std::uint32_t    cmd;        /* LC_SYMTAB_ */
135     std::uint32_t    cmdsize;    /* sizeof(struct symtab_command) */
136     std::uint32_t    symoff;     /* symbol table offset */
137     std::uint32_t    nsyms;      /* number of symbol table entries */
138     std::uint32_t    stroff;     /* string table offset */
139     std::uint32_t    strsize;    /* string table size in bytes */
140   };
141 
142   template <class AddressOffsetT>
143   struct nlist_template {
144     std::uint32_t     n_strx;
145     std::uint8_t      n_type;
146     std::uint8_t      n_sect;
147     std::uint16_t     n_desc;
148     AddressOffsetT      n_value;
149   };
150 
151   typedef nlist_template<std::uint32_t> nlist_32_;
152   typedef nlist_template<std::uint64_t> nlist_64_;
153 
154   template <class AddressOffsetT>
155   class macho_info
156     : public tfel::system::LibraryInformation::Implementation
157   {
158     std::shared_ptr<std::ifstream> f_;
159 
160     typedef details::mach_header_template<AddressOffsetT>        header_t;
161     typedef details::load_command_                               load_command_t;
162     typedef details::segment_command_template<AddressOffsetT>    segment_t;
163     typedef details::section_template<AddressOffsetT>            section_t;
164     typedef details::symtab_command_                             symbol_header_t;
165     typedef details::nlist_template<AddressOffsetT>              nlist_t;
166 
167     static constexpr const std::uint32_t SEGMENT_CMD_NUMBER = (sizeof(AddressOffsetT) > 4 ?
168 							       load_command_types::LC_SEGMENT_64_ :
169 							       load_command_types::LC_SEGMENT_);
170 
171   public:
parsing_supported(std::ifstream & f)172     static bool parsing_supported(std::ifstream& f) {
173       static constexpr const uint32_t magic_bytes = (sizeof(AddressOffsetT) <= sizeof(uint32_t) ?
174 						     0xfeedface : 0xfeedfacf);
175 
176       uint32_t magic;
177       f.seekg(0);
178       f.read(reinterpret_cast<char*>(&magic), sizeof(magic));
179       return (magic_bytes == magic);
180     }
181 
macho_info(std::shared_ptr<std::ifstream> f)182     explicit macho_info(std::shared_ptr<std::ifstream> f) noexcept
183       : f_(f)
184     {}
185 
186   private:
187     template <class T>
read_raw(T & value,std::size_t size=sizeof (T)) const188     inline void read_raw(T& value, std::size_t size = sizeof(T)) const {
189       this->f_->read(reinterpret_cast<char*>(&value),
190 		     static_cast<std::streamsize>(size));
191     }
192 
193     template <class F>
command_finder(uint32_t cmd_num,F callback_f)194     void command_finder(uint32_t cmd_num, F callback_f) {
195       const header_t h = header();
196       load_command_t command;
197       this->f_->seekg(sizeof(header_t));
198       for (std::size_t i = 0; i < h.ncmds; ++i) {
199 	const std::ifstream::pos_type pos = this->f_->tellg();
200 	read_raw(command);
201 	if (command.cmd != cmd_num) {
202 	  this->f_->seekg(pos + static_cast<std::ifstream::pos_type>(command.cmdsize));
203 	  continue;
204 	}
205 
206 	this->f_->seekg(pos);
207 	callback_f(*this);
208 	this->f_->seekg(pos + static_cast<std::ifstream::pos_type>(command.cmdsize));
209       }
210     }
211 
212     struct section_names_gather {
213       std::vector<std::string>&       ret;
214 
operator ()details::macho_info::section_names_gather215       void operator()(const macho_info& f) const {
216 	segment_t segment;
217 	f.read_raw(segment);
218 
219 	section_t section;
220 	ret.reserve(ret.size() + segment.nsects);
221 	for (std::size_t j = 0; j < segment.nsects; ++j) {
222 	  f.read_raw(section);
223 	  // `segname` goes right after the `sectname`.
224 	  // Forcing `sectname` to end on '\0'
225 	  section.segname[0] = '\0';
226 	  ret.push_back(section.sectname);
227 	  if (ret.back().empty()) {
228 	    ret.pop_back(); // Do not show empty names
229 	  }
230 	}
231       }
232     };
233 
234     struct symbol_names_gather {
235       std::vector<std::string>&       ret;
236       std::size_t                     section_index;
237 
operator ()details::macho_info::symbol_names_gather238       void operator()(const macho_info& f) const {
239 	symbol_header_t symbh;
240 	f.read_raw(symbh);
241 	ret.reserve(ret.size() + symbh.nsyms);
242 
243 	nlist_t symbol;
244 	std::string symbol_name;
245 	for (std::size_t j = 0; j < symbh.nsyms; ++j) {
246 	  f.f_->seekg(static_cast<std::streamoff>(symbh.symoff + j * sizeof(nlist_t)));
247 	  f.read_raw(symbol);
248 	  if (!symbol.n_strx) {
249 	    continue; // Symbol has no name
250 	  }
251 
252 	  if ((symbol.n_type & 0x0e) != 0xe || !symbol.n_sect) {
253 	    continue; // Symbol has no section
254 	  }
255 
256 	  if (section_index && section_index != symbol.n_sect) {
257 	    continue; // Not in the required section
258 	  }
259 
260 	  f.f_->seekg(symbh.stroff + symbol.n_strx);
261 	  std::getline(*(f.f_), symbol_name, '\0');
262 	  if (symbol_name.empty()) {
263 	    continue;
264 	  }
265 
266 	  if (symbol_name[0] == '_') {
267 	    // Linker adds additional '_' symbol. Could not find official docs for that case.
268 	    ret.push_back(symbol_name.c_str() + 1);
269 	  } else {
270 	    ret.push_back(symbol_name);
271 	  }
272 	}
273       }
274     };
275 
276   public:
sections()277     std::vector<std::string> sections() override{
278       std::vector<std::string> ret;
279       section_names_gather f = { ret };
280       command_finder(SEGMENT_CMD_NUMBER, f);
281       return ret;
282     }
283 
284   private:
header()285     inline header_t header() {
286       header_t h;
287 
288       this->f_->seekg(0);
289       read_raw(h);
290 
291       return h;
292     }
293 
294   public:
symbols()295     std::vector<std::string> symbols() override{
296       std::vector<std::string> ret;
297       symbol_names_gather f = { ret, 0 };
298       command_finder(load_command_types::LC_SYMTAB_, f);
299       return ret;
300     }
301 
symbols(const char * section_name)302     std::vector<std::string> symbols(const char* section_name) override{
303       // Not very optimal solution
304       std::vector<std::string> ret = sections();
305       std::vector<std::string>::iterator it = std::find(ret.begin(), ret.end(), section_name);
306       if (it == ret.end()) {
307 	// No section with such name
308 	ret.clear();
309 	return ret;
310       }
311 
312       // section indexes start from 1
313       symbol_names_gather f = { ret, static_cast<std::size_t>(1 + (it - ret.begin())) };
314       ret.clear();
315       command_finder(load_command_types::LC_SYMTAB_, f);
316       return ret;
317     }
318   };
319 
320   typedef macho_info<std::uint32_t> macho_info32;
321   typedef macho_info<std::uint64_t> macho_info64;
322 
323 } // namespace detail
324