1 /*
2 * -*- coding: utf-8; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:filetype=c:et:sw=4:ts=4:sts=4:tw=100
3 * libmachista.c
4 * $Id: libmachista.c 120067 2014-05-14 22:18:53Z cal@macports.org $
5 *
6 * Copyright (c) 2011 The MacPorts Project
7 * Copyright (c) 2011 Landon Fuller <landonf@macports.org>
8 * Copyright (c) 2011 Clemens Lang <cal@macports.org>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifdef HAVE_CONFIG_H
34 #include <pkg_config.h>
35 #endif
36
37 /* required for asprintf(3) on OS X */
38 #define _DARWIN_C_SOURCE
39 /* required for asprintf(3) on Linux */
40 #define _GNU_SOURCE
41
42 #include <stdbool.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45
46 #include <fcntl.h>
47 #include <sys/mman.h>
48 #include <sys/stat.h>
49 #include <unistd.h>
50
51 #include <err.h>
52 #include <string.h>
53 #include <strings.h>
54
55 #ifdef __MACH__
56 #include <mach-o/fat.h>
57 #include <mach-o/loader.h>
58
59 #include <libkern/OSAtomic.h>
60 #endif
61
62 #include "libmachista.h"
63 #include "hashmap.h"
64
65 #ifdef __MACH__
66 /* Tiger compatibility */
67 #ifndef LC_RPATH
68 #define LC_RPATH (0x1c | LC_REQ_DYLD) /* runpath additions */
69 /*
70 * The rpath_command contains a path which at runtime should be added to
71 * the current run path used to find @rpath prefixed dylibs.
72 */
73 struct rpath_command {
74 uint32_t cmd; /* LC_RPATH */
75 uint32_t cmdsize; /* includes string */
76 union lc_str path; /* path to add to run path */
77 };
78 #endif
79 #ifndef LC_REEXPORT_DYLIB
80 #define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD) /* load and re-export dylib */
81 #endif
82 #endif /* __MACH__ */
83
84 typedef struct macho_input {
85 const void *data;
86 size_t length;
87 } macho_input_t;
88
89 /* This is macho_handle_t. The corresponding typedef is in the header */
90 struct macho_handle {
91 HashMap *result_map;
92 };
93
94 #ifdef __MACH__
95 /* Verify that the given range is within bounds. */
macho_read(macho_input_t * input,const void * address,size_t length)96 static const void *macho_read (macho_input_t *input, const void *address, size_t length) {
97 if ((((uint8_t *) address) - ((uint8_t *) input->data)) + length > input->length) {
98 // warnx("Short read parsing Mach-O input");
99 return NULL;
100 }
101
102 return address;
103 }
104
105 /* Verify that address + offset + length is within bounds. */
macho_offset(macho_input_t * input,const void * address,size_t offset,size_t length)106 static const void *macho_offset (macho_input_t *input, const void *address, size_t offset, size_t length) {
107 void *result = ((uint8_t *) address) + offset;
108 return macho_read(input, result, length);
109 }
110 #endif
111
112 /* return a human readable formatted version number. the result must be free()'d. */
macho_format_dylib_version(uint32_t version)113 char *macho_format_dylib_version (uint32_t version) {
114 char *result;
115 asprintf(&result, "%"PRIu32".%"PRIu32".%"PRIu32, (version >> 16) & 0xFFFF, (version >> 8) & 0xFF, version & 0xFF);
116 return result;
117 }
118
119 #ifdef __MACH__
macho_get_arch_name(cpu_type_t cputype)120 const char *macho_get_arch_name (cpu_type_t cputype) {
121 const NXArchInfo *archInfo = NXGetArchInfoFromCpuType(cputype, CPU_SUBTYPE_MULTIPLE);
122 if (!archInfo) {
123 return NULL;
124 }
125 return archInfo->name;
126 #else
127 const char *macho_get_arch_name (cpu_type_t cputype UNUSED) {
128 return NULL;
129 #endif
130 }
131
132 #ifdef __MACH__
133 /* Some byteswap wrappers */
134 static uint32_t macho_swap32 (uint32_t input) {
135 return OSSwapInt32(input);
136 }
137
138 static uint32_t macho_nswap32(uint32_t input) {
139 return input;
140 }
141
142 /* Creates a new macho_t.
143 * Returns NULL on failure or a pointer to a 0-initialized macho_t on success */
144 static macho_t *create_macho_t (void) {
145 macho_t *mt = malloc(sizeof(macho_t));
146 if (mt == NULL)
147 return NULL;
148
149 memset(mt, 0, sizeof(macho_t));
150 return mt;
151 }
152
153 /* Creates a new macho_arch_t.
154 * Returns NULL on failure or a pointer to a 0-initialized macho_arch_t on success */
155 static macho_arch_t *create_macho_arch_t (void) {
156 macho_arch_t *mat = malloc(sizeof(macho_arch_t));
157 if (mat == NULL)
158 return NULL;
159
160 memset(mat, 0, sizeof(macho_arch_t));
161 return mat;
162 }
163
164 /* Creates a new macho_loadcmd_t.
165 * Returns NULL on failure or a pointer to a 0-initialized macho_loadcmd_t on success */
166 static macho_loadcmd_t *create_macho_loadcmd_t (void) {
167 macho_loadcmd_t *mlt = malloc(sizeof(macho_loadcmd_t));
168 if (mlt == NULL)
169 return NULL;
170
171 memset(mlt, 0, sizeof(macho_loadcmd_t));
172 return mlt;
173 }
174 #endif
175
176 /* Frees a previously allocated macho_loadcmd_t and all it's associated resources */
177 static void free_macho_loadcmd_t (macho_loadcmd_t *mlt) {
178 if (mlt == NULL)
179 return;
180
181 free(mlt->mlt_install_name);
182 free(mlt);
183 }
184
185 /* Frees a previously allocated macho_arch_t and all it's associated resources */
186 static void free_macho_arch_t (macho_arch_t *mat) {
187 if (mat == NULL)
188 return;
189
190 macho_loadcmd_t *current = mat->mat_loadcmds;
191 while (current != NULL) {
192 macho_loadcmd_t *freeme = current;
193 current = current->next;
194 free_macho_loadcmd_t(freeme);
195 }
196
197 free(mat->mat_install_name);
198 free(mat->mat_rpath);
199 free(mat);
200 }
201
202 /* Frees a previously allocated macho_t and all it's associated resources */
203 static void free_macho_t (macho_t *mt) {
204 if (mt == NULL)
205 return;
206
207 macho_arch_t *current = mt->mt_archs;
208 while (current != NULL) {
209 macho_arch_t *freeme = current;
210 current = current->next;
211 free_macho_arch_t(freeme);
212 }
213
214 free(mt);
215 }
216
217 #ifdef __MACH__
218 /* Creates a new element in the architecture list of a macho_t (mt_archs), increases the counter of
219 * architectures (mt_arch_count) and returns a pointer to the newly allocated element or NULL on
220 * error */
221 static macho_arch_t *macho_archlist_append (macho_t *mt) {
222 macho_arch_t *old_head = mt->mt_archs;
223
224 macho_arch_t *new_head = create_macho_arch_t();
225 if (new_head == NULL)
226 return NULL;
227 new_head->next = old_head;
228 mt->mt_archs = new_head;
229
230 return mt->mt_archs;
231 }
232
233 /* Creates a new element in the load command list of a macho_arch_t (mat_loadcmds), increases the
234 * counter of load commands (mat_loadcmd_count) and returns a pointer to the newly allocated element
235 * or NULL on error */
236 static macho_loadcmd_t *macho_loadcmdlist_append (macho_arch_t *mat) {
237 macho_loadcmd_t *old_head = mat->mat_loadcmds;
238
239 macho_loadcmd_t *new_head = create_macho_loadcmd_t();
240 if (new_head == NULL)
241 return NULL;
242 new_head->next = old_head;
243 mat->mat_loadcmds = new_head;
244
245 return mat->mat_loadcmds;
246 }
247 #endif
248
249 /* Parse a Mach-O header */
250 #ifdef __MACH__
251 static int parse_macho (macho_t *mt, macho_input_t *input) {
252 /* Read the file type. */
253 const uint32_t *magic = macho_read(input, input->data, sizeof(uint32_t));
254 if (magic == NULL)
255 return MACHO_ERANGE;
256
257 /* Parse the Mach-O header */
258 bool universal = false;
259 uint32_t (*swap32)(uint32_t) = macho_nswap32;
260
261 const struct mach_header *header;
262 const struct mach_header_64 *header64;
263 size_t header_size;
264 const struct fat_header *fat_header;
265
266 macho_arch_t *mat = NULL;
267 switch (*magic) {
268 case MH_CIGAM:
269 swap32 = macho_swap32;
270 // Fall-through
271
272 case MH_MAGIC:
273
274 header_size = sizeof(*header);
275 header = macho_read(input, input->data, header_size);
276 if (header == NULL)
277 return MACHO_ERANGE;
278 mat = macho_archlist_append(mt);
279 if (mat == NULL)
280 return MACHO_EMEM;
281
282 /* 32-bit Mach-O */
283 mat->mat_cputype = swap32(header->cputype);
284 mat->mat_cpusubtype = swap32(header->cpusubtype);
285 break;
286
287
288 case MH_CIGAM_64:
289 swap32 = macho_swap32;
290 // Fall-through
291
292 case MH_MAGIC_64:
293 header_size = sizeof(*header64);
294 header64 = macho_read(input, input->data, sizeof(*header64));
295 if (header64 == NULL)
296 return MACHO_ERANGE;
297 mat = macho_archlist_append(mt);
298 if (mat == NULL)
299 return MACHO_EMEM;
300
301 /* The 64-bit header is a direct superset of the 32-bit header */
302 header = (struct mach_header *) header64;
303
304 /* 64-bit Macho-O */
305 mat->mat_cputype = swap32(header->cputype);
306 mat->mat_cpusubtype = swap32(header->cpusubtype);
307 break;
308
309 case FAT_CIGAM:
310 case FAT_MAGIC:
311 fat_header = macho_read(input, input->data, sizeof(*fat_header));
312 universal = true;
313 /* Universal binary */
314 break;
315
316 default:
317 /* Unknown binary type */
318 //warnx("Unknown Mach-O magic: 0x%" PRIx32 "", *magic);
319 return MACHO_EMAGIC;
320 }
321
322 /* Parse universal file. */
323 if (universal) {
324 uint32_t nfat = OSSwapBigToHostInt32(fat_header->nfat_arch);
325 const struct fat_arch *archs = macho_offset(input, fat_header, sizeof(struct fat_header), sizeof(struct fat_arch));
326 if (archs == NULL)
327 return MACHO_ERANGE;
328
329 for (uint32_t i = 0; i < nfat; i++) { // foreach architecture
330 const struct fat_arch *arch = macho_read(input, archs + i, sizeof(struct fat_arch));
331 if (arch == NULL)
332 return MACHO_ERANGE;
333
334 /* Fetch a pointer to the architecture's Mach-O header. */
335 macho_input_t arch_input;
336 arch_input.length = OSSwapBigToHostInt32(arch->size);
337 arch_input.data = macho_offset(input, input->data, OSSwapBigToHostInt32(arch->offset), arch_input.length);
338 if (arch_input.data == NULL)
339 return MACHO_ERANGE;
340
341 /* Parse the architecture's Mach-O header */
342 int res = parse_macho(mt, &arch_input);
343 if (res != MACHO_SUCCESS)
344 return res;
345 }
346
347 return MACHO_SUCCESS;
348 }
349
350 /* Copy the architecture */
351 mat->mat_cputype = swap32(header->cputype);
352 mat->mat_cpusubtype = swap32(header->cpusubtype);
353
354 /* Parse the Mach-O load commands */
355 uint32_t ncmds = swap32(header->ncmds);
356
357 /* Setup to jump over the header on the first pass through instead of the previous command */
358 const struct load_command *cmd = (void *)header;
359 uint32_t cmdsize = header_size;
360
361 /* Iterate over the load commands */
362 for (uint32_t i = 0; i < ncmds; i++) {
363 /* Load the next command */
364 cmd = macho_offset(input, cmd, cmdsize, sizeof(struct load_command));
365 if (cmd == NULL)
366 return MACHO_ERANGE;
367
368 /* Load the full command */
369 cmdsize = swap32(cmd->cmdsize);
370 cmd = macho_read(input, cmd, cmdsize);
371 if (cmd == NULL)
372 return MACHO_ERANGE;
373
374 /* Handle known types */
375 uint32_t cmd_type = swap32(cmd->cmd);
376 switch (cmd_type) {
377 case LC_RPATH: {
378 /* Copy the rpath */
379 if (cmdsize < sizeof(struct rpath_command)) {
380 //warnx("Incorrect cmd size");
381 return MACHO_ERANGE;
382 }
383
384 size_t pathlen = cmdsize - sizeof(struct rpath_command);
385 const void *pathptr = macho_offset(input, cmd, sizeof(struct rpath_command), pathlen);
386 if (pathptr == NULL)
387 return MACHO_ERANGE;
388
389 mat->mat_rpath = malloc(pathlen);
390 if (mat->mat_rpath == NULL)
391 return MACHO_EMEM;
392 strlcpy(mat->mat_rpath, pathptr, pathlen);
393 break;
394 }
395
396 case LC_ID_DYLIB:
397 case LC_LOAD_WEAK_DYLIB:
398 case LC_REEXPORT_DYLIB:
399 case LC_LOAD_DYLIB: {
400 const struct dylib_command *dylib_cmd = (const struct dylib_command *) cmd;
401
402 /* Extract the install name */
403 if (cmdsize < sizeof(struct dylib_command)) {
404 //warnx("Incorrect name size");
405 return MACHO_ERANGE;
406 }
407
408 size_t namelen = cmdsize - sizeof(struct dylib_command);
409 const void *nameptr = macho_offset(input, cmd, sizeof(struct dylib_command), namelen);
410 if (nameptr == NULL)
411 return MACHO_ERANGE;
412
413 if (cmd_type == LC_ID_DYLIB) {
414 /* Copy install name */
415 mat->mat_install_name = malloc(namelen);
416 if (mat->mat_install_name == NULL)
417 return MACHO_EMEM;
418 strlcpy(mat->mat_install_name, nameptr, namelen);
419
420 /* Copy version numbers (raw, for easier comparison) */
421 mat->mat_version = swap32(dylib_cmd->dylib.current_version);
422 mat->mat_comp_version = swap32(dylib_cmd->dylib.compatibility_version);
423 } else {
424 /* Append loadcmd to list of loadcommands */
425 macho_loadcmd_t *mlt = macho_loadcmdlist_append(mat);
426 if (mlt == NULL)
427 return MACHO_EMEM;
428
429 /* Copy install name */
430 mlt->mlt_install_name = malloc(namelen);
431 if (mlt->mlt_install_name == NULL)
432 return MACHO_EMEM;
433 strlcpy(mlt->mlt_install_name, nameptr, namelen);
434
435 /* Copy version numbers (raw, for easier comparison) */
436 mlt->mlt_version = swap32(dylib_cmd->dylib.current_version);
437 mlt->mlt_comp_version = swap32(dylib_cmd->dylib.compatibility_version);
438
439 /* Copy command type */
440 mlt->mlt_type = cmd_type;
441 }
442 break;
443 }
444
445 default:
446 break;
447 }
448 }
449
450 return MACHO_SUCCESS;
451 }
452 #endif
453
454 /* Parse a (possible Mach-O) file. For a more detailed description, see the header */
455 #ifdef __MACH__
456 int macho_parse_file(macho_handle_t *handle, const char *filepath, const macho_t **res) {
457 int fd;
458 struct stat st;
459 void *data;
460 macho_input_t input_file;
461
462 /* Check hashmap for precomputed results */
463 const macho_t *cached_res = hashMapGet(handle->result_map, filepath);
464 if (cached_res != NULL) {
465 *res = cached_res;
466 return MACHO_SUCCESS;
467 }
468
469
470 /* Open input file */
471 if ((fd = open(filepath, O_RDONLY)) < 0) {
472 return MACHO_EFILE;
473 }
474
475 /* Get file length */
476 if (fstat(fd, &st) != 0) {
477 close(fd);
478 return MACHO_EFILE;
479 }
480
481 /* Map file into address space */
482 if ((data = mmap(NULL, st.st_size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0)) == MAP_FAILED) {
483 close(fd);
484 return MACHO_EMMAP;
485 }
486
487 /* Parse file */
488 input_file.data = data;
489 input_file.length = st.st_size;
490
491 *res = create_macho_t();
492 if (*res == NULL)
493 return MACHO_EMEM;
494
495 /* The output parameter *res should be read-only for the user of the lib only, but writable for
496 * us */
497 int ret = parse_macho((macho_t *)*res, &input_file);
498 if (ret == MACHO_SUCCESS) {
499 /* Insert into hashmap for caching */
500 if (0 == hashMapPut(handle->result_map, filepath, *res, NULL)) {
501 free_macho_t((macho_t *)*res);
502 *res = NULL;
503 ret = MACHO_EMEM;
504 }
505 } else {
506 /* An error occured, free mt */
507 free_macho_t((macho_t *)*res);
508 *res = NULL;
509 }
510
511 /* Cleanup */
512 munmap(data, st.st_size);
513 close(fd);
514
515 return ret;
516 #else
517 int macho_parse_file(macho_handle_t *handle UNUSED, const char *filepath UNUSED, const macho_t **res UNUSED) {
518 return 0;
519 #endif
520 }
521
522 /* Create a new macho_handle_t. More information on this function is available in the header */
523 macho_handle_t *macho_create_handle (void) {
524 macho_handle_t *mht = malloc(sizeof(macho_handle_t));
525 if (mht == NULL)
526 return NULL;
527 mht->result_map = hashMapCreate((void (*)(const void *))free_macho_t);
528 if (mht->result_map == NULL) {
529 free(mht);
530 return NULL;
531 }
532 return mht;
533 }
534
535 /* Release a macho_handle_t. For more documentation, see the header */
536 void macho_destroy_handle(macho_handle_t *handle) {
537 if (handle == NULL)
538 return;
539
540 hashMapDestroy(handle->result_map);
541
542 free(handle);
543 }
544
545 /* Returns string representation of the MACHO_* error code constants */
546 const char *macho_strerror(int err) {
547 int num;
548 #ifdef HAVE_FLS
549 num = fls(err);
550 #else
551 /* Tiger compatibility, see #42186 */
552 num = 0;
553 while (err > 0) {
554 err >>= 1;
555 num++;
556 }
557 #endif
558
559 static char *errors[] = {
560 /* 0x00 */ "Success",
561 /* 0x01 */ "Error opening or reading file",
562 /* 0x02 */ "Error mapping file into memory",
563 /* 0x04 */ "Error allocating memory",
564 /* 0x08 */ "Premature end of data, possibly corrupt file",
565 /* 0x10 */ "Not a Mach-O file",
566 };
567 return errors[num];
568 }
569
570