1 /*
2  *  Copyright (C) 2005-2009  Anders Gavare.  All rights reserved.
3  *
4  *  Redistribution and use in source and binary forms, with or without
5  *  modification, are permitted provided that the following conditions are met:
6  *
7  *  1. Redistributions of source code must retain the above copyright
8  *     notice, this list of conditions and the following disclaimer.
9  *  2. Redistributions in binary form must reproduce the above copyright
10  *     notice, this list of conditions and the following disclaimer in the
11  *     documentation and/or other materials provided with the distribution.
12  *  3. The name of the author may not be used to endorse or promote products
13  *     derived from this software without specific prior written permission.
14  *
15  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  *  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  *  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  *  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  *  SUCH DAMAGE.
26  *
27  *
28  *  COMMENT: Mach-O file support
29  */
30 
31 /*  Note: Included from file.c.  */
32 
33 
34 /*
35  *  file_load_macho():
36  *
37  *  Loads a Mach-O binary image into the emulated memory. The entry point
38  *  is stored in the specified CPU's registers.
39  *
40  *  TODO:
41  *
42  *	o)  Almost everything.
43  *
44  *	o)  I haven't had time to look into whether Apple's open source
45  *	    license is BSD-compatible or not. Perhaps it would be possible
46  *	    to use a header file containing symbolic names, and not use
47  *	    hardcoded values.
48  */
file_load_macho(struct machine * m,struct memory * mem,char * filename,uint64_t * entrypointp,int arch,int * byte_orderp,int is_64bit,int is_reversed)49 static void file_load_macho(struct machine *m, struct memory *mem,
50 	char *filename, uint64_t *entrypointp, int arch, int *byte_orderp,
51 	int is_64bit, int is_reversed)
52 {
53 	FILE *f;
54 	uint64_t entry = 0;
55 	int entry_set = 0;
56 	int encoding = ELFDATA2MSB;
57 	unsigned char buf[65536];
58 	char *symbols, *strings;
59 	uint32_t cputype, cpusubtype, filetype, ncmds, sizeofcmds, flags;
60 	uint64_t vmaddr, vmsize, fileoff, filesize;
61 	int cmd_type, cmd_len, i, flavor;
62 	int32_t symoff, nsyms, stroff, strsize;
63 	size_t len, pos;
64 
65 	if (m->cpus[0]->byte_order == EMUL_BIG_ENDIAN)
66 		encoding = ELFDATA2MSB;
67 
68 	f = fopen(filename, "r");
69 	if (f == NULL) {
70 		perror(filename);
71 		exit(1);
72 	}
73 
74 	if (is_64bit) {
75 		fatal("TODO: 64-bit Mach-O. Not supported yet.\n");
76 		exit(1);
77 	}
78 	if (is_reversed) {
79 		fatal("TODO: Reversed-endianness. Not supported yet.\n");
80 		exit(1);
81 	}
82 
83 	len = fread(buf, 1, sizeof(buf), f);
84 	if (len < 100) {
85 		fatal("Bad Mach-O file?\n");
86 		exit(1);
87 	}
88 
89 	unencode(cputype,    &buf[4], uint32_t);
90 	unencode(cpusubtype, &buf[8], uint32_t);
91 	unencode(filetype,   &buf[12], uint32_t);
92 	unencode(ncmds,      &buf[16], uint32_t);
93 	unencode(sizeofcmds, &buf[20], uint32_t);
94 	unencode(flags,      &buf[24], uint32_t);
95 
96 	/*  debug("cputype=0x%x cpusubtype=0x%x filetype=0x%x\n",
97 	    cputype, cpusubtype, filetype);
98 	    debug("ncmds=%i sizeofcmds=0x%08x flags=0x%08x\n",
99 	    ncmds, sizeofcmds, flags);  */
100 
101 	/*
102 	 *  Compare to "normal" values.
103 	 *  NOTE/TODO: These were for a Darwin (Macintosh PPC) kernel.
104 	 */
105 	if (cputype != 0x12) {
106 		fatal("Error: Unimplemented cputype 0x%x\n", cputype);
107 		exit(1);
108 	}
109 	if (cpusubtype != 0) {
110 		fatal("Error: Unimplemented cpusubtype 0x%x\n", cpusubtype);
111 		exit(1);
112 	}
113 	/*  Filetype 2 means an executable image.  */
114 	if (filetype != 2) {
115 		fatal("Error: Unimplemented filetype 0x%x\n", filetype);
116 		exit(1);
117 	}
118 	if (!(flags & 1)) {
119 		fatal("Error: File has 'undefined references'. Cannot"
120 		    " be executed.\n", flags);
121 		exit(1);
122 	}
123 
124 	/*  I've only encountered flags == 1 so far.  */
125 	if (flags != 1) {
126 		fatal("Error: Unimplemented flags 0x%x\n", flags);
127 		exit(1);
128 	}
129 
130 	/*
131 	 *  Read all load commands:
132 	 */
133 	pos = is_64bit? 32 : 28;
134 	cmd_type = 0;
135 	do {
136 		/*  Read command type and length:  */
137 		unencode(cmd_type, &buf[pos], uint32_t);
138 		unencode(cmd_len,  &buf[pos+4], uint32_t);
139 
140 #if 0
141 		debug("cmd %i, len=%i\n", cmd_type, cmd_len);
142 		for (i=8; i<cmd_len; i++) {
143 			unsigned char ch = buf[pos+i];
144 			if (ch >= ' ' && ch < 127)
145 				debug("%c", ch);
146 			else
147 				debug(".");
148 		}
149 #endif
150 		switch (cmd_type) {
151 		case 1:	/*  LC_SEGMENT  */
152 			debug("seg ");
153 			for (i=0; i<16; i++) {
154 				if (buf[pos + 8 + i] == 0)
155 					break;
156 				debug("%c", buf[pos + 8 + i]);
157 			}
158 			unencode(vmaddr,   &buf[pos+8+16+0], uint32_t);
159 			unencode(vmsize,   &buf[pos+8+16+4], uint32_t);
160 			unencode(fileoff,  &buf[pos+8+16+8], uint32_t);
161 			unencode(filesize, &buf[pos+8+16+12], uint32_t);
162 			debug(": vmaddr=0x%x size=0x%x fileoff=0x%x",
163 			    (int)vmaddr, (int)vmsize, (int)fileoff);
164 
165 			if (filesize == 0) {
166 				debug("\n");
167 				break;
168 			}
169 
170 			fseek(f, fileoff, SEEK_SET);
171 
172 			/*  Load data from the file:  */
173 			while (filesize != 0) {
174 				unsigned char buf2[32768];
175 				ssize_t lenRead = filesize > sizeof(buf2) ?
176 				    sizeof(buf2) : filesize;
177 				lenRead = fread(buf2, 1, lenRead, f);
178 
179 				/*  printf("fread len=%i vmaddr=%x buf[0..]="
180 				    "%02x %02x %02x\n", (int)len, (int)vmaddr,
181 				    buf2[0], buf2[1], buf2[2]);  */
182 
183 				if (lenRead > 0) {
184 					int len2 = 0;
185 					uint64_t vaddr1 = vmaddr &
186 					    ((1 << BITS_PER_MEMBLOCK) - 1);
187 					uint64_t vaddr2 = (vmaddr +
188 					    lenRead) & ((1 << BITS_PER_MEMBLOCK)-1);
189 					if (vaddr2 < vaddr1) {
190 						len2 = lenRead - vaddr2;
191 						m->cpus[0]->memory_rw(m->cpus[
192 						    0], mem, vmaddr, &buf2[0],
193 						    len2, MEM_WRITE,
194 						    NO_EXCEPTIONS);
195 					}
196 					m->cpus[0]->memory_rw(m->cpus[0], mem,
197 					    vmaddr + len2, &buf2[len2], lenRead-len2,
198 					    MEM_WRITE, NO_EXCEPTIONS);
199 				} else {
200 					fprintf(stderr, "error reading\n");
201 					exit(1);
202 				}
203 
204 				vmaddr += lenRead;
205 				filesize -= lenRead;
206 			}
207 
208 			debug("\n");
209 			break;
210 
211 		case 2:	/*  LC_SYMTAB  */
212 			unencode(symoff,  &buf[pos+8], uint32_t);
213 			unencode(nsyms,   &buf[pos+12], uint32_t);
214 			unencode(stroff,  &buf[pos+16], uint32_t);
215 			unencode(strsize, &buf[pos+20], uint32_t);
216 			debug("symtable: %i symbols @ 0x%x (strings at "
217 			    "0x%x)\n", nsyms, symoff, stroff);
218 
219 			CHECK_ALLOCATION(symbols = (char *) malloc(12 * nsyms));
220 			fseek(f, symoff, SEEK_SET);
221 			if (fread(symbols, 1, 12 * nsyms, f) != (size_t) 12*nsyms) {
222 				fprintf(stderr, "could not read symbols from %s\n", filename);
223 				exit(1);
224 			}
225 
226 			CHECK_ALLOCATION(strings = (char *) malloc(strsize));
227 			fseek(f, stroff, SEEK_SET);
228 			if (fread(strings, 1, strsize, f) != (size_t) strsize) {
229 				fprintf(stderr, "could not read symbol strings from %s\n", filename);
230 				exit(1);
231 			}
232 
233 			for (i=0; i<nsyms; i++) {
234 				int n_strx, n_type, n_sect, n_desc;
235 				uint32_t n_value;
236 				unencode(n_strx,  &symbols[i*12+0], int32_t);
237 				unencode(n_type,  &symbols[i*12+4], uint8_t);
238 				unencode(n_sect,  &symbols[i*12+5], uint8_t);
239 				unencode(n_desc,  &symbols[i*12+6], int16_t);
240 				unencode(n_value, &symbols[i*12+8], uint32_t);
241 				/*  debug("%i: strx=%i type=%i sect=%i desc=%i"
242 				    " value=0x%x\n", i, n_strx, n_type,
243 				    n_sect, n_desc, n_value);  */
244 				add_symbol_name(&m->symbol_context,
245 				    n_value, 0, strings + n_strx, 0, -1);
246 			}
247 
248 			free(symbols);
249 			free(strings);
250 			break;
251 
252 		case 5:	debug("unix thread context: ");
253 			/*  See http://cvs.sf.net/viewcvs.py/hte/
254 			    HT%20Editor/machostruc.h or similar for details
255 			    on the thread struct.  */
256 			unencode(flavor, &buf[pos+8], uint32_t);
257 			if (flavor != 1) {
258 				fatal("unimplemented flavor %i\n", flavor);
259 				exit(1);
260 			}
261 
262 			if (arch != ARCH_PPC) {
263 				fatal("non-PPC arch? TODO\n");
264 				exit(1);
265 			}
266 
267 			unencode(entry, &buf[pos+16], uint32_t);
268 			entry_set = 1;
269 			debug("pc=0x%x\n", (int)entry);
270 
271 			for (i=1; i<40; i++) {
272 				uint32_t x;
273 				unencode(x, &buf[pos+16+i*4], uint32_t);
274 				if (x != 0) {
275 					fatal("Entry nr %i in the Mach-O"
276 					    " thread struct is non-zero"
277 					    " (0x%x). This is not supported"
278 					    " yet. TODO\n", i, x);
279 					exit(1);
280 				}
281 			}
282 			break;
283 
284 		default:fatal("WARNING! Unimplemented load command %i!\n",
285 			    cmd_type);
286 		}
287 
288 		pos += cmd_len;
289 	} while (pos < sizeofcmds && cmd_type != 0);
290 
291 	fclose(f);
292 
293 	if (!entry_set) {
294 		fatal("No entry point? Aborting.\n");
295 		exit(1);
296 	}
297 
298 	*entrypointp = entry;
299 
300 	if (encoding == ELFDATA2LSB)
301 		*byte_orderp = EMUL_LITTLE_ENDIAN;
302 	else
303 		*byte_orderp = EMUL_BIG_ENDIAN;
304 
305 	n_executables_loaded ++;
306 }
307 
308