1 /*
2  * -*- coding: utf-8; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:filetype=c:et:sw=4:ts=4:sts=4:tw=100
3  * libmachista.c
4  * $Id: libmachista.c 120067 2014-05-14 22:18:53Z cal@macports.org $
5  *
6  * Copyright (c) 2011 The MacPorts Project
7  * Copyright (c) 2011 Landon Fuller <landonf@macports.org>
8  * Copyright (c) 2011 Clemens Lang <cal@macports.org>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #ifdef HAVE_CONFIG_H
34 #include <pkg_config.h>
35 #endif
36 
37 /* required for asprintf(3) on OS X */
38 #define _DARWIN_C_SOURCE
39 /* required for asprintf(3) on Linux */
40 #define _GNU_SOURCE
41 
42 #include <stdbool.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 
46 #include <fcntl.h>
47 #include <sys/mman.h>
48 #include <sys/stat.h>
49 #include <unistd.h>
50 
51 #include <err.h>
52 #include <string.h>
53 #include <strings.h>
54 
55 #ifdef __MACH__
56 #include <mach-o/fat.h>
57 #include <mach-o/loader.h>
58 
59 #include <libkern/OSAtomic.h>
60 #endif
61 
62 #include "libmachista.h"
63 #include "hashmap.h"
64 
65 #ifdef __MACH__
66 /* Tiger compatibility */
67 #ifndef LC_RPATH
68 #define LC_RPATH       (0x1c | LC_REQ_DYLD)    /* runpath additions */
69 /*
70  * The rpath_command contains a path which at runtime should be added to
71  * the current run path used to find @rpath prefixed dylibs.
72  */
73 struct rpath_command {
74     uint32_t     cmd;       /* LC_RPATH */
75     uint32_t     cmdsize;   /* includes string */
76     union lc_str path;      /* path to add to run path */
77 };
78 #endif
79 #ifndef LC_REEXPORT_DYLIB
80 #define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD) /* load and re-export dylib */
81 #endif
82 #endif /* __MACH__ */
83 
84 typedef struct macho_input {
85     const void *data;
86     size_t length;
87 } macho_input_t;
88 
89 /* This is macho_handle_t. The corresponding typedef is in the header */
90 struct macho_handle {
91     HashMap *result_map;
92 };
93 
94 #ifdef __MACH__
95 /* Verify that the given range is within bounds. */
macho_read(macho_input_t * input,const void * address,size_t length)96 static const void *macho_read (macho_input_t *input, const void *address, size_t length) {
97     if ((((uint8_t *) address) - ((uint8_t *) input->data)) + length > input->length) {
98        // warnx("Short read parsing Mach-O input");
99         return NULL;
100     }
101 
102     return address;
103 }
104 
105 /* Verify that address + offset + length is within bounds. */
macho_offset(macho_input_t * input,const void * address,size_t offset,size_t length)106 static const void *macho_offset (macho_input_t *input, const void *address, size_t offset, size_t length) {
107     void *result = ((uint8_t *) address) + offset;
108     return macho_read(input, result, length);
109 }
110 #endif
111 
112 /* return a human readable formatted version number. the result must be free()'d. */
macho_format_dylib_version(uint32_t version)113 char *macho_format_dylib_version (uint32_t version) {
114     char *result;
115     asprintf(&result, "%"PRIu32".%"PRIu32".%"PRIu32, (version >> 16) & 0xFFFF, (version >> 8) & 0xFF, version & 0xFF);
116     return result;
117 }
118 
119 #ifdef __MACH__
macho_get_arch_name(cpu_type_t cputype)120 const char *macho_get_arch_name (cpu_type_t cputype) {
121     const NXArchInfo *archInfo = NXGetArchInfoFromCpuType(cputype, CPU_SUBTYPE_MULTIPLE);
122     if (!archInfo) {
123         return NULL;
124     }
125     return archInfo->name;
126 #else
127 const char *macho_get_arch_name (cpu_type_t cputype UNUSED) {
128     return NULL;
129 #endif
130 }
131 
132 #ifdef __MACH__
133 /* Some byteswap wrappers */
134 static uint32_t macho_swap32 (uint32_t input) {
135     return OSSwapInt32(input);
136 }
137 
138 static uint32_t macho_nswap32(uint32_t input) {
139     return input;
140 }
141 
142 /* Creates a new macho_t.
143  * Returns NULL on failure or a pointer to a 0-initialized macho_t on success */
144 static macho_t *create_macho_t (void) {
145     macho_t *mt = malloc(sizeof(macho_t));
146     if (mt == NULL)
147         return NULL;
148 
149     memset(mt, 0, sizeof(macho_t));
150     return mt;
151 }
152 
153 /* Creates a new macho_arch_t.
154  * Returns NULL on failure or a pointer to a 0-initialized macho_arch_t on success */
155 static macho_arch_t *create_macho_arch_t (void) {
156     macho_arch_t *mat = malloc(sizeof(macho_arch_t));
157     if (mat == NULL)
158         return NULL;
159 
160     memset(mat, 0, sizeof(macho_arch_t));
161     return mat;
162 }
163 
164 /* Creates a new macho_loadcmd_t.
165  * Returns NULL on failure or a pointer to a 0-initialized macho_loadcmd_t on success */
166 static macho_loadcmd_t *create_macho_loadcmd_t (void) {
167     macho_loadcmd_t *mlt = malloc(sizeof(macho_loadcmd_t));
168     if (mlt == NULL)
169         return NULL;
170 
171     memset(mlt, 0, sizeof(macho_loadcmd_t));
172     return mlt;
173 }
174 #endif
175 
176 /* Frees a previously allocated macho_loadcmd_t and all it's associated resources */
177 static void free_macho_loadcmd_t (macho_loadcmd_t *mlt) {
178     if (mlt == NULL)
179         return;
180 
181     free(mlt->mlt_install_name);
182     free(mlt);
183 }
184 
185 /* Frees a previously allocated macho_arch_t and all it's associated resources */
186 static void free_macho_arch_t (macho_arch_t *mat) {
187     if (mat == NULL)
188         return;
189 
190     macho_loadcmd_t *current = mat->mat_loadcmds;
191     while (current != NULL) {
192         macho_loadcmd_t *freeme = current;
193         current = current->next;
194         free_macho_loadcmd_t(freeme);
195     }
196 
197     free(mat->mat_install_name);
198     free(mat->mat_rpath);
199     free(mat);
200 }
201 
202 /* Frees a previously allocated macho_t and all it's associated resources */
203 static void free_macho_t (macho_t *mt) {
204     if (mt == NULL)
205         return;
206 
207     macho_arch_t *current = mt->mt_archs;
208     while (current != NULL) {
209         macho_arch_t *freeme = current;
210         current = current->next;
211         free_macho_arch_t(freeme);
212     }
213 
214     free(mt);
215 }
216 
217 #ifdef __MACH__
218 /* Creates a new element in the architecture list of a macho_t (mt_archs), increases the counter of
219  * architectures (mt_arch_count) and returns a pointer to the newly allocated element or NULL on
220  * error */
221 static macho_arch_t *macho_archlist_append (macho_t *mt) {
222     macho_arch_t *old_head = mt->mt_archs;
223 
224     macho_arch_t *new_head = create_macho_arch_t();
225     if (new_head == NULL)
226         return NULL;
227     new_head->next = old_head;
228     mt->mt_archs = new_head;
229 
230     return mt->mt_archs;
231 }
232 
233 /* Creates a new element in the load command list of a macho_arch_t (mat_loadcmds), increases the
234  * counter of load commands (mat_loadcmd_count) and returns a pointer to the newly allocated element
235  * or NULL on error */
236 static macho_loadcmd_t *macho_loadcmdlist_append (macho_arch_t *mat) {
237     macho_loadcmd_t *old_head = mat->mat_loadcmds;
238 
239     macho_loadcmd_t *new_head = create_macho_loadcmd_t();
240     if (new_head == NULL)
241         return NULL;
242     new_head->next = old_head;
243     mat->mat_loadcmds = new_head;
244 
245     return mat->mat_loadcmds;
246 }
247 #endif
248 
249 /* Parse a Mach-O header */
250 #ifdef __MACH__
251 static int parse_macho (macho_t *mt, macho_input_t *input) {
252     /* Read the file type. */
253     const uint32_t *magic = macho_read(input, input->data, sizeof(uint32_t));
254     if (magic == NULL)
255         return MACHO_ERANGE;
256 
257     /* Parse the Mach-O header */
258     bool universal = false;
259     uint32_t (*swap32)(uint32_t) = macho_nswap32;
260 
261     const struct mach_header *header;
262     const struct mach_header_64 *header64;
263     size_t header_size;
264     const struct fat_header *fat_header;
265 
266     macho_arch_t *mat = NULL;
267     switch (*magic) {
268         case MH_CIGAM:
269             swap32 = macho_swap32;
270             // Fall-through
271 
272         case MH_MAGIC:
273 
274             header_size = sizeof(*header);
275             header = macho_read(input, input->data, header_size);
276             if (header == NULL)
277                 return MACHO_ERANGE;
278             mat = macho_archlist_append(mt);
279             if (mat == NULL)
280                 return MACHO_EMEM;
281 
282             /* 32-bit Mach-O */
283             mat->mat_cputype = swap32(header->cputype);
284             mat->mat_cpusubtype = swap32(header->cpusubtype);
285             break;
286 
287 
288         case MH_CIGAM_64:
289             swap32 = macho_swap32;
290             // Fall-through
291 
292         case MH_MAGIC_64:
293             header_size = sizeof(*header64);
294             header64 = macho_read(input, input->data, sizeof(*header64));
295             if (header64 == NULL)
296                 return MACHO_ERANGE;
297             mat = macho_archlist_append(mt);
298             if (mat == NULL)
299                 return MACHO_EMEM;
300 
301             /* The 64-bit header is a direct superset of the 32-bit header */
302             header = (struct mach_header *) header64;
303 
304             /* 64-bit Macho-O */
305             mat->mat_cputype = swap32(header->cputype);
306             mat->mat_cpusubtype = swap32(header->cpusubtype);
307             break;
308 
309         case FAT_CIGAM:
310         case FAT_MAGIC:
311             fat_header = macho_read(input, input->data, sizeof(*fat_header));
312             universal = true;
313             /* Universal binary */
314             break;
315 
316         default:
317             /* Unknown binary type */
318             //warnx("Unknown Mach-O magic: 0x%" PRIx32 "", *magic);
319             return MACHO_EMAGIC;
320     }
321 
322     /* Parse universal file. */
323     if (universal) {
324         uint32_t nfat = OSSwapBigToHostInt32(fat_header->nfat_arch);
325         const struct fat_arch *archs = macho_offset(input, fat_header, sizeof(struct fat_header), sizeof(struct fat_arch));
326         if (archs == NULL)
327             return MACHO_ERANGE;
328 
329         for (uint32_t i = 0; i < nfat; i++) { // foreach architecture
330             const struct fat_arch *arch = macho_read(input, archs + i, sizeof(struct fat_arch));
331             if (arch == NULL)
332                 return MACHO_ERANGE;
333 
334             /* Fetch a pointer to the architecture's Mach-O header. */
335             macho_input_t arch_input;
336             arch_input.length = OSSwapBigToHostInt32(arch->size);
337             arch_input.data = macho_offset(input, input->data, OSSwapBigToHostInt32(arch->offset), arch_input.length);
338             if (arch_input.data == NULL)
339                 return MACHO_ERANGE;
340 
341             /* Parse the architecture's Mach-O header */
342             int res = parse_macho(mt, &arch_input);
343             if (res != MACHO_SUCCESS)
344                 return res;
345         }
346 
347         return MACHO_SUCCESS;
348     }
349 
350     /* Copy the architecture */
351     mat->mat_cputype = swap32(header->cputype);
352     mat->mat_cpusubtype = swap32(header->cpusubtype);
353 
354     /* Parse the Mach-O load commands */
355     uint32_t ncmds = swap32(header->ncmds);
356 
357     /* Setup to jump over the header on the first pass through instead of the previous command */
358     const struct load_command *cmd = (void *)header;
359     uint32_t cmdsize = header_size;
360 
361     /* Iterate over the load commands */
362     for (uint32_t i = 0; i < ncmds; i++) {
363         /* Load the next command */
364         cmd = macho_offset(input, cmd, cmdsize, sizeof(struct load_command));
365         if (cmd == NULL)
366             return MACHO_ERANGE;
367 
368         /* Load the full command */
369         cmdsize = swap32(cmd->cmdsize);
370         cmd = macho_read(input, cmd, cmdsize);
371         if (cmd == NULL)
372             return MACHO_ERANGE;
373 
374         /* Handle known types */
375         uint32_t cmd_type = swap32(cmd->cmd);
376         switch (cmd_type) {
377             case LC_RPATH: {
378                 /* Copy the rpath */
379                 if (cmdsize < sizeof(struct rpath_command)) {
380                     //warnx("Incorrect cmd size");
381                     return MACHO_ERANGE;
382                 }
383 
384                 size_t pathlen = cmdsize - sizeof(struct rpath_command);
385                 const void *pathptr = macho_offset(input, cmd, sizeof(struct rpath_command), pathlen);
386                 if (pathptr == NULL)
387                     return MACHO_ERANGE;
388 
389                 mat->mat_rpath = malloc(pathlen);
390                 if (mat->mat_rpath == NULL)
391                     return MACHO_EMEM;
392                 strlcpy(mat->mat_rpath, pathptr, pathlen);
393                 break;
394             }
395 
396             case LC_ID_DYLIB:
397             case LC_LOAD_WEAK_DYLIB:
398             case LC_REEXPORT_DYLIB:
399             case LC_LOAD_DYLIB: {
400                 const struct dylib_command *dylib_cmd = (const struct dylib_command *) cmd;
401 
402                 /* Extract the install name */
403                 if (cmdsize < sizeof(struct dylib_command)) {
404                     //warnx("Incorrect name size");
405                     return MACHO_ERANGE;
406                 }
407 
408                 size_t namelen = cmdsize - sizeof(struct dylib_command);
409                 const void *nameptr = macho_offset(input, cmd, sizeof(struct dylib_command), namelen);
410                 if (nameptr == NULL)
411                     return MACHO_ERANGE;
412 
413                 if (cmd_type == LC_ID_DYLIB) {
414                     /* Copy install name */
415                     mat->mat_install_name = malloc(namelen);
416                     if (mat->mat_install_name == NULL)
417                         return MACHO_EMEM;
418                     strlcpy(mat->mat_install_name, nameptr, namelen);
419 
420                     /* Copy version numbers (raw, for easier comparison) */
421                     mat->mat_version = swap32(dylib_cmd->dylib.current_version);
422                     mat->mat_comp_version = swap32(dylib_cmd->dylib.compatibility_version);
423                 } else {
424                     /* Append loadcmd to list of loadcommands */
425                     macho_loadcmd_t *mlt = macho_loadcmdlist_append(mat);
426                     if (mlt == NULL)
427                         return MACHO_EMEM;
428 
429                     /* Copy install name */
430                     mlt->mlt_install_name = malloc(namelen);
431                     if (mlt->mlt_install_name == NULL)
432                         return MACHO_EMEM;
433                     strlcpy(mlt->mlt_install_name, nameptr, namelen);
434 
435                     /* Copy version numbers (raw, for easier comparison) */
436                     mlt->mlt_version = swap32(dylib_cmd->dylib.current_version);
437                     mlt->mlt_comp_version = swap32(dylib_cmd->dylib.compatibility_version);
438 
439                     /* Copy command type */
440                     mlt->mlt_type = cmd_type;
441                 }
442                 break;
443             }
444 
445             default:
446                 break;
447         }
448     }
449 
450     return MACHO_SUCCESS;
451 }
452 #endif
453 
454 /* Parse a (possible Mach-O) file. For a more detailed description, see the header */
455 #ifdef __MACH__
456 int macho_parse_file(macho_handle_t *handle, const char *filepath, const macho_t **res) {
457     int fd;
458     struct stat st;
459     void *data;
460     macho_input_t input_file;
461 
462     /* Check hashmap for precomputed results */
463     const macho_t *cached_res = hashMapGet(handle->result_map, filepath);
464     if (cached_res != NULL) {
465         *res = cached_res;
466         return MACHO_SUCCESS;
467     }
468 
469 
470     /* Open input file */
471     if ((fd = open(filepath, O_RDONLY)) < 0) {
472         return MACHO_EFILE;
473     }
474 
475     /* Get file length */
476     if (fstat(fd, &st) != 0) {
477         close(fd);
478         return MACHO_EFILE;
479     }
480 
481     /* Map file into address space */
482     if ((data = mmap(NULL, st.st_size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0)) == MAP_FAILED) {
483         close(fd);
484         return MACHO_EMMAP;
485     }
486 
487     /* Parse file */
488     input_file.data = data;
489     input_file.length = st.st_size;
490 
491     *res = create_macho_t();
492     if (*res == NULL)
493         return MACHO_EMEM;
494 
495     /* The output parameter *res should be read-only for the user of the lib only, but writable for
496      * us */
497     int ret = parse_macho((macho_t *)*res, &input_file);
498     if (ret == MACHO_SUCCESS) {
499         /* Insert into hashmap for caching */
500         if (0 == hashMapPut(handle->result_map, filepath, *res, NULL)) {
501             free_macho_t((macho_t *)*res);
502             *res = NULL;
503             ret = MACHO_EMEM;
504         }
505     } else {
506         /* An error occured, free mt */
507         free_macho_t((macho_t *)*res);
508         *res = NULL;
509     }
510 
511     /* Cleanup */
512     munmap(data, st.st_size);
513     close(fd);
514 
515     return ret;
516 #else
517 int macho_parse_file(macho_handle_t *handle UNUSED, const char *filepath UNUSED, const macho_t **res UNUSED) {
518     return 0;
519 #endif
520 }
521 
522 /* Create a new macho_handle_t. More information on this function is available in the header */
523 macho_handle_t *macho_create_handle (void) {
524     macho_handle_t *mht = malloc(sizeof(macho_handle_t));
525     if (mht == NULL)
526         return NULL;
527     mht->result_map = hashMapCreate((void (*)(const void *))free_macho_t);
528     if (mht->result_map == NULL) {
529         free(mht);
530         return NULL;
531     }
532     return mht;
533 }
534 
535 /* Release a macho_handle_t. For more documentation, see the header */
536 void macho_destroy_handle(macho_handle_t *handle) {
537     if (handle == NULL)
538         return;
539 
540     hashMapDestroy(handle->result_map);
541 
542     free(handle);
543 }
544 
545 /* Returns string representation of the MACHO_* error code constants */
546 const char *macho_strerror(int err) {
547     int num;
548 #ifdef HAVE_FLS
549     num = fls(err);
550 #else
551     /* Tiger compatibility, see #42186 */
552     num = 0;
553     while (err > 0) {
554         err >>= 1;
555         num++;
556     }
557 #endif
558 
559     static char *errors[] = {
560         /* 0x00 */ "Success",
561         /* 0x01 */ "Error opening or reading file",
562         /* 0x02 */ "Error mapping file into memory",
563         /* 0x04 */ "Error allocating memory",
564         /* 0x08 */ "Premature end of data, possibly corrupt file",
565         /* 0x10 */ "Not a Mach-O file",
566     };
567     return errors[num];
568 }
569 
570