1 /**************************** mac2asm.cpp *********************************
2 * Author: Agner Fog
3 * Date created: 2007-05-24
4 * Last modified: 2008-05-12
5 * Project: objconv
6 * Module: mac2asm.cpp
7 * Description:
8 * Module for disassembling Mach-O files
9 *
10 * Copyright 2007-2008 GNU General Public License http://www.gnu.org/licenses
11 *****************************************************************************/
12 #include "stdafx.h"
13
14 // Constructor
15 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
CMAC2ASM()16 CMAC2ASM<MACSTRUCTURES>::CMAC2ASM() {
17 }
18
19 // Convert
20 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
Convert()21 void CMAC2ASM<MACSTRUCTURES>::Convert() {
22 // Do the conversion
23
24 // Check cpu type
25 switch (this->FileHeader.cputype) {
26 case MAC_CPU_TYPE_I386:
27 this->WordSize = 32; break;
28
29 case MAC_CPU_TYPE_X86_64:
30 this->WordSize = 64; break;
31
32 default:
33 // Wrong type
34 err.submit(2011, ""); return;
35 }
36
37 // check object/executable file type
38 uint32_t ExeType; // File type: 0 = object, 1 = position independent shared object, 2 = executable
39
40 switch (this->FileHeader.filetype) {
41 case MAC_OBJECT: // Relocatable object file
42 ExeType = 0; break;
43
44 case MAC_FVMLIB: // fixed VM shared library file
45 case MAC_DYLIB: // dynamicly bound shared library file
46 case MAC_BUNDLE: // part of universal binary
47 ExeType = 1; break;
48
49 case MAC_EXECUTE: // demand paged executable file
50 case MAC_CORE: // core file
51 case MAC_PRELOAD: // preloaded executable file
52 ExeType = 2; break;
53
54 default: // Other types
55 err.submit(2011, ""); return;
56 }
57
58 // Tell disassembler
59 // Disasm.Init(ExeType, this->ImageBase);
60 Disasm.Init(ExeType, 0);
61
62 // Make Sections list and relocations list
63 MakeSectionList();
64
65 // Make Symbols list in Disasm
66 MakeSymbolList();
67
68 // Make relocations list in Disasm
69 MakeRelocations();
70
71 // Make symbol entries for imported symbols
72 MakeImports();
73
74 Disasm.Go(); // Disassemble
75
76 *this << Disasm.OutFile; // Take over output file from Disasm
77 }
78
79 // MakeSectionList
80
81 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
MakeSectionList()82 void CMAC2ASM<MACSTRUCTURES>::MakeSectionList() {
83 // Make Sections list and Relocations list in Disasm
84
85 uint32_t icmd; // Command index
86 int32_t isec1; // Section index within segment
87 int32_t isec2 = 0; // Section index global
88 int32_t nsect; // Number of sections in segment
89 uint32_t cmd; // Load command
90 uint32_t cmdsize; // Command size
91
92 StringBuffer.Push(0, 1); // Initialize string buffer
93
94 // Pointer to current position
95 uint8_t * currentp = (uint8_t*)(this->Buf() + sizeof(TMAC_header));
96
97 // Loop through file commands
98 for (icmd = 1; icmd <= this->FileHeader.ncmds; icmd++) {
99 cmd = ((MAC_load_command*)currentp) -> cmd;
100 cmdsize = ((MAC_load_command*)currentp) -> cmdsize;
101
102 if (cmd == MAC_LC_SEGMENT || cmd == MAC_LC_SEGMENT_64) {
103 // This is a segment command
104 if ((this->WordSize == 64) ^ (cmd == MAC_LC_SEGMENT_64)) {
105 // Inconsistent word size
106 err.submit(2320); break;
107 }
108
109 // Number of sections in segment
110 nsect = ((TMAC_segment_command*)currentp) -> nsects;
111
112 // Find first section header
113 TMAC_section * sectp = (TMAC_section*)(currentp + sizeof(TMAC_segment_command));
114
115 // Loop through section headers
116 for (isec1 = 1; isec1 <= nsect; isec1++, sectp++) {
117
118 if (sectp->offset >= this->GetDataSize()) {
119 // points outside file
120 err.submit(2035); break;
121 }
122
123 // Get section properties
124 isec2++; // Section number
125 uint32_t MacSectionType = sectp->flags & MAC_SECTION_TYPE;
126 uint8_t * Buffer = (uint8_t*)(this->Buf()) + sectp->offset;
127 uint32_t TotalSize = (uint32_t)sectp->size;
128 uint32_t InitSize = TotalSize;
129 if (MacSectionType == MAC_S_ZEROFILL) InitSize = 0;
130 uint32_t SectionAddress = (uint32_t)sectp->addr;
131 uint32_t Align = sectp->align;
132
133 // Get section type
134 // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data
135 uint32_t Type = 0;
136 if (sectp->flags & (MAC_S_ATTR_PURE_INSTRUCTIONS | MAC_S_ATTR_SOME_INSTRUCTIONS)) {
137 Type = 1; // code
138 }
139 else if (MacSectionType == MAC_S_ZEROFILL) {
140 Type = 3; // uninitialized data
141 }
142 else {
143 Type = 2; // data or anything else
144 }
145
146 // Make section name by combining segment name and section name
147 uint32_t NameOffset = StringBuffer.Push(sectp->segname, (uint32_t)strlen(sectp->segname)); // Segment name
148 StringBuffer.Push(".", 1); // Separate by dot
149 StringBuffer.PushString(sectp->sectname); // Section name
150 char * Name = (char*)StringBuffer.Buf() + NameOffset;
151
152 // Save section record
153 Disasm.AddSection(Buffer, InitSize, TotalSize, SectionAddress, Type, Align, this->WordSize, Name);
154
155 // Save information about relocation list for this section
156 if (sectp->nreloc) {
157 MAC_SECT_WITH_RELOC RelList = {isec2, sectp->offset, sectp->nreloc, sectp->reloff};
158 RelocationQueue.Push(RelList);
159 }
160
161 // Find import tables
162 if (MacSectionType >= MAC_S_NON_LAZY_SYMBOL_POINTERS && MacSectionType <= MAC_S_LAZY_SYMBOL_POINTERS /*?*/) {
163 // This is an import table
164 ImportSections.Push(sectp);
165 }
166 // Find literals sections
167 if (MacSectionType == MAC_S_4BYTE_LITERALS || MacSectionType == MAC_S_8BYTE_LITERALS) {
168 // This is a literals section
169 ImportSections.Push(sectp);
170 }
171 }
172 }
173 currentp += cmdsize;
174 }
175 }
176
177 // MakeRelocations
178 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
MakeRelocations()179 void CMAC2ASM<MACSTRUCTURES>::MakeRelocations() {
180 // Make relocations for object and executable files
181 uint32_t iqq; // Index into RelocationQueue = table of relocation tables
182 uint32_t irel; // Index into relocation table
183 int32_t Section; // Section index
184 uint32_t SectOffset; // File offset of section binary data
185 uint32_t NumReloc; // Number of relocations records for this section
186 uint32_t ReltabOffset; // File offset of relocation table for this section
187 uint32_t SourceOffset; // Section-relative offset of relocation source
188 uint32_t SourceSize; // Size of relocation source
189 int32_t Inline = 0; // Inline addend at relocation source
190 uint32_t TargetAddress; // Base-relative address of relocation target
191 uint32_t TargetSymbol; // Symbol index of target
192 //int32_t TargetSection; // Target section
193 int32_t Addend; // Offset to add to target
194 uint32_t ReferenceAddress; // Base-relative address of reference point
195 uint32_t ReferenceSymbol; // Symbol index of reference point
196 uint32_t R_Type; // Relocation type in Mach-O record
197 uint32_t R_Type2; // Relocation type of second entry of a pair
198 uint32_t R_PCRel; // Relocation is self-relative
199 uint32_t RelType = 0; // Relocation type translated to disasm record
200
201 // Loop through RelocationQueue. There is one entry for each relocation table
202 for (iqq = 0; iqq < RelocationQueue.GetNumEntries(); iqq++) {
203 Section = RelocationQueue[iqq].Section; // Section index
204 SectOffset = RelocationQueue[iqq].SectOffset; // File offset of section binary data
205 NumReloc = RelocationQueue[iqq].NumReloc; // Number of relocations records for this section
206 ReltabOffset = RelocationQueue[iqq].ReltabOffset; // File offset of relocation table for this section
207
208 if (NumReloc == 0) continue;
209
210 if (ReltabOffset == 0 || ReltabOffset >= this->GetDataSize() || ReltabOffset + NumReloc*sizeof(MAC_relocation_info) >= this->GetDataSize()) {
211 // Pointer out of range
212 err.submit(2035); return;
213 }
214
215 // pointer to relocation info
216 union {
217 MAC_relocation_info * r;
218 MAC_scattered_relocation_info * s;
219 int8_t * b;
220 } relp;
221 // Point to first relocation entry
222 relp.b = this->Buf() + ReltabOffset;
223
224 // Loop through relocation table
225 for (irel = 0; irel < NumReloc; irel++, relp.r++) {
226
227 // Set defaults
228 ReferenceAddress = ReferenceSymbol = TargetSymbol = Addend = 0;
229
230 if (relp.s->r_scattered) {
231 // scattered relocation entry
232 SourceOffset = relp.s->r_address;
233 SourceSize = 1 << relp.s->r_length;
234 R_PCRel = relp.s->r_pcrel;
235 R_Type = relp.s->r_type;
236 TargetAddress = relp.s->r_value;
237 TargetSymbol = 0;
238 }
239 else {
240 // non-scattered relocation entry
241 SourceOffset = relp.r->r_address;
242 SourceSize = 1 << relp.r->r_length;
243 R_PCRel = relp.r->r_pcrel;
244 R_Type = relp.r->r_type;
245 if (relp.r->r_extern) {
246 TargetSymbol = relp.r->r_symbolnum + 1;
247 }
248 else {
249 //TargetSection = relp.r->r_symbolnum;
250 }
251 TargetAddress = 0;
252 }
253
254 if (this->WordSize == 32 && (R_Type == MAC32_RELOC_SECTDIFF || R_Type == MAC32_RELOC_LOCAL_SECTDIFF)) {
255 // This is the first of a pair of relocation entries.
256 // Get second entry containing reference point
257 irel++; relp.r++;
258 if (irel >= NumReloc) {err.submit(2050); break;}
259
260 if (relp.s->r_scattered) {
261 // scattered relocation entry
262 R_Type2 = relp.s->r_type;
263 ReferenceAddress = relp.s->r_value;
264 ReferenceSymbol = 0;
265 }
266 else {
267 // non-scattered relocation entry
268 ReferenceSymbol = relp.r->r_symbolnum + 1;
269 R_Type2 = relp.r->r_type;
270 ReferenceAddress = 0;
271 }
272 if (R_Type2 != MAC32_RELOC_PAIR) {err.submit(2050); break;}
273
274 if (ReferenceSymbol == 0) {
275 // Reference point has no symbol index. Make one
276 ReferenceSymbol = Disasm.AddSymbol(ASM_SEGMENT_IMGREL, ReferenceAddress, 0, 0, 2, 0, 0);
277 }
278 }
279
280 if (this->WordSize == 64 && R_Type == MAC64_RELOC_SUBTRACTOR) {
281 // This is the first of a pair of relocation entries.
282 // The first entry contains reference point to subtract
283 irel++; relp.r++;
284 if (irel >= NumReloc || relp.s->r_scattered || relp.r->r_type != MAC64_RELOC_UNSIGNED) {
285 err.submit(2050); break;
286 }
287 ReferenceSymbol = TargetSymbol;
288 R_PCRel = relp.r->r_pcrel;
289 if (relp.r->r_extern) {
290 TargetSymbol = relp.r->r_symbolnum + 1;
291 }
292 else {
293 //TargetSection = relp.r->r_symbolnum;
294 }
295 TargetAddress = 0;
296 }
297
298 // Get inline addend or address
299 if (SectOffset + SourceOffset < this->GetDataSize()) {
300 switch (SourceSize) {
301 case 1:
302 Inline = CMemoryBuffer::Get<int8_t>(SectOffset+SourceOffset);
303 // (this->Get<int8_t> doesn't work on Gnu compiler 4.0.1)
304 break;
305 case 2:
306 Inline = CMemoryBuffer::Get<int16_t>(SectOffset+SourceOffset);
307 break;
308 case 4: case 8:
309 Inline = CMemoryBuffer::Get<int32_t>(SectOffset+SourceOffset);
310 break;
311 default:
312 Inline = 0;
313 }
314 }
315
316 if (this->WordSize == 32) {
317 // Calculate target address and addend, 32 bit system
318 if (R_Type == MAC32_RELOC_SECTDIFF || R_Type == MAC32_RELOC_LOCAL_SECTDIFF) {
319 // Relative to reference point
320 // Compensate for inline value = TargetAddress - ReferenceAddress;
321 Addend = ReferenceAddress - TargetAddress;
322 }
323 else if (R_PCRel) {
324 // Self-relative
325 TargetAddress += Inline + SourceOffset + SourceSize;
326 Addend = -4 - Inline;
327 }
328 else {
329 // Direct
330 TargetAddress += Inline;
331 Addend = -Inline;
332 }
333 }
334
335 if (TargetSymbol == 0) {
336 // Target has no symbol index. Make one
337 TargetSymbol = Disasm.AddSymbol(ASM_SEGMENT_IMGREL, TargetAddress, 0, 0, 2, 0, 0);
338 }
339
340 // Find type
341 if (this->WordSize == 32) {
342 switch (R_Type) {
343 case MAC32_RELOC_VANILLA:
344 // Direct or self-relative
345 RelType = R_PCRel ? 2 : 1;
346 break;
347
348 case MAC32_RELOC_SECTDIFF: case MAC32_RELOC_LOCAL_SECTDIFF:
349 // Relative to reference point
350 RelType = 0x10;
351 break;
352
353 case MAC32_RELOC_PB_LA_PTR:
354 // Lazy pointer
355 RelType = 0x41; //??
356 break;
357
358 default:
359 // Unknown type
360 err.submit(2030, R_Type);
361 break;
362 }
363 }
364 else { // 64-bit relocation types
365 switch (R_Type) {
366 case MAC64_RELOC_UNSIGNED:
367 // Absolute address
368 RelType = 1;
369 break;
370 case MAC64_RELOC_BRANCH:
371 // Signed 32-bit displacement with implicit -4 addend
372 case MAC64_RELOC_SIGNED:
373 // Signed 32-bit displacement with implicit -4 addend
374 case MAC64_RELOC_SIGNED_1:
375 // Signed 32-bit displacement with implicit -4 addend and explicit -1 addend
376 case MAC64_RELOC_SIGNED_2:
377 // Signed 32-bit displacement with implicit -4 addend and explicit -2 addend
378 case MAC64_RELOC_SIGNED_4:
379 // Signed 32-bit displacement with implicit -4 addend and explicit -4 addend
380 RelType = 2; Addend -= 4;
381 break;
382 case MAC64_RELOC_GOT:
383 // Absolute or relative reference to GOT?
384 // RelType = 0x1001; break;
385 case MAC64_RELOC_GOT_LOAD:
386 // Signed 32-bit displacement to GOT
387 RelType = 0x1002; Addend -= 4;
388 break;
389 case MAC64_RELOC_SUBTRACTOR:
390 // 32 or 64 bit relative to arbitrary reference point
391 RelType = 0x10;
392 break;
393 default:
394 // Unknown type
395 err.submit(2030, R_Type);
396 break;
397 }
398 }
399
400 // Make relocation record
401 Disasm.AddRelocation(Section, SourceOffset, Addend,
402 RelType, SourceSize, TargetSymbol, ReferenceSymbol);
403 }
404 }
405 }
406
407 // MakeSymbolList
408 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
MakeSymbolList()409 void CMAC2ASM<MACSTRUCTURES>::MakeSymbolList() {
410 // Make Symbols list in Disasm
411 uint32_t symi; // Symbol index, 0-based
412 uint32_t symn = 0; // Symbol number, 1-based
413 char * Name; // Symbol name
414 int32_t Section; // Section number (1-based). 0 = external, ASM_SEGMENT_ABSOLUTE = absolute, ASM_SEGMENT_IMGREL = image-relative
415 uint32_t Offset; // Offset into section. (Value for absolute symbol)
416 uint32_t Type; // Symbol type. Use values listed above for SOpcodeDef operands. 0 = unknown type
417 uint32_t Scope; // 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external
418
419 // pointer to string table
420 char * strtab = (char*)(this->Buf() + this->StringTabOffset);
421
422 // loop through symbol table
423 TMAC_nlist * symp = (TMAC_nlist*)(this->Buf() + this->SymTabOffset);
424 for (symi = 0; symi < this->SymTabNumber; symi++, symp++) {
425
426 if (symp->n_type & MAC_N_STAB) {
427 // Debug symbol. Ignore
428 continue;
429 }
430
431 if (symp->n_strx < this->StringTabSize) {
432 // Normal symbol
433 Section = symp->n_sect;
434 Offset = (uint32_t)symp->n_value;
435 Name = strtab + symp->n_strx;
436 symn = symi + 1; // Convert 0-based to 1-based index
437
438 // Get scope
439 if (symi < this->iextdefsym) {
440 // Local
441 Scope = 2;
442 }
443 else if (Section && (symp->n_type & MAC_N_TYPE) != MAC_N_UNDF) {
444 // Public
445 Scope = 4;
446 }
447 else {
448 // External
449 Scope = 0x20;
450 }
451 // Check if absolute
452 if ((symp->n_type & MAC_N_TYPE) == MAC_N_ABS) {
453 // Absolute
454 Section = ASM_SEGMENT_ABSOLUTE; Scope = 4;
455 }
456 // Check if weak/communal
457 if (symp->n_type & MAC_N_PEXT) {
458 // Communal?
459 Scope = 0x10;
460 }
461 else if (symp->n_desc & MAC_N_WEAK_DEF) {
462 // Weak public
463 Scope = 8;
464 }
465 else if (symp->n_desc & MAC_N_WEAK_REF) {
466 // Weak external (not supported by disassembler)
467 Scope = 0x20;
468 }
469 // Get type
470 Type = 0;
471
472 // Offset is always based, not section-relative
473 if (Section > 0) Section = ASM_SEGMENT_IMGREL;
474
475 // Add symbol to diassembler
476 Disasm.AddSymbol(Section, Offset, 0, Type, Scope, symn, Name);
477 }
478 }
479 }
480
481 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
MakeImports()482 void CMAC2ASM<MACSTRUCTURES>::MakeImports() {
483 // Make symbol entries for all import tables
484 uint32_t isec; // Index into ImportSections list
485 uint32_t SectionType; // Section type
486 TMAC_section * sectp; // Pointer to section
487 TMAC_nlist * symp0 = (TMAC_nlist*)(this->Buf() + this->SymTabOffset); // Pointer to symbol table
488 uint32_t * IndSymp = (uint32_t*)(this->Buf() + this->IndirectSymTabOffset); // Pointer to indirect symbol table
489 uint32_t iimp; // Index into import table
490 char * strtab = (char*)(this->Buf() + this->StringTabOffset); // pointer to string table
491
492 // Loop through import sections
493 for (isec = 0; isec < ImportSections.GetNumEntries(); isec++) {
494 // Pointer to section header
495 sectp = ImportSections[isec];
496 // Section type
497 SectionType = sectp->flags & MAC_SECTION_TYPE;
498 if (SectionType >= MAC_S_NON_LAZY_SYMBOL_POINTERS && SectionType <= MAC_S_MOD_INIT_FUNC_POINTERS) {
499
500 // This section contains import tables
501 // Entry size in import table
502 uint32_t EntrySize = sectp->reserved2;
503 // Entry size is 4 if not specified
504 if (EntrySize == 0) EntrySize = 4;
505 // Number of entries
506 uint32_t NumEntries = (uint32_t)sectp->size / EntrySize;
507 // Index into indirect symbol table entry of first entry in import table
508 uint32_t Firsti = sectp->reserved1;
509 // Check if within range
510 if (Firsti + NumEntries > this->IndirectSymTabNumber) {
511 // This occurs when disassembling 64-bit Mach-O executable
512 // I don't know how to interpret the import table
513 err.submit(1054); continue;
514 }
515 // Loop through import table entries
516 for (iimp = 0; iimp < NumEntries; iimp++) {
517 // Address of import table entry
518 uint32_t ImportAddress = (uint32_t)sectp->addr + iimp * EntrySize;
519 // Get symbol table index from indirect symbol table
520 uint32_t symi = IndSymp[iimp + Firsti];
521 // Check index
522 if (symi == 0x80000000) {
523 // This value occurs. Maybe it means ignore?
524 continue;
525 }
526 // Check if index within symbol table
527 if (symi >= this->SymTabNumber) {
528 err.submit(1052); continue;
529 }
530 // Find name
531 uint32_t StringIndex = symp0[symi].n_strx;
532 if (StringIndex >= this->StringTabSize) {
533 err.submit(1052); continue;
534 }
535 const char * Name = strtab + StringIndex;
536 // Name of .so to import from
537 const char * DLLName = "?";
538
539 // Symbol type
540 uint32_t Type = 0;
541 switch (SectionType) {
542 case MAC_S_NON_LAZY_SYMBOL_POINTERS:
543 case MAC_S_LAZY_SYMBOL_POINTERS:
544 // pointer to symbol
545 Type = 3; break;
546 case MAC_S_SYMBOL_STUBS:
547 // jump to function
548 Type = 0x83;
549 // Make appear as direct call
550 DLLName = 0;
551 break;
552 case MAC_S_MOD_INIT_FUNC_POINTERS:
553 // function pointer?
554 Type = 0x0C; break;
555 }
556
557 // Make symbol record for disassembler
558 Disasm.AddSymbol(ASM_SEGMENT_IMGREL, ImportAddress, 4, Type, 2, 0, Name, DLLName);
559 }
560 }
561 else if (SectionType == MAC_S_4BYTE_LITERALS) {
562 // Section contains 4-byte float constants.
563 // Make symbol
564 Disasm.AddSymbol(ASM_SEGMENT_IMGREL, (uint32_t)sectp->addr, 4, 0x43, 2, 0, "Float_constants");
565 }
566 else if (SectionType == MAC_S_8BYTE_LITERALS) {
567 // Section contains 8-byte double constants.
568 // Make symbol
569 Disasm.AddSymbol(ASM_SEGMENT_IMGREL, (uint32_t)sectp->addr, 8, 0x44, 2, 0, "Double_constants");
570 }
571 }
572 }
573
574
575 // Make template instances for 32 and 64 bits
576 template class CMAC2ASM<MAC32STRUCTURES>;
577 template class CMAC2ASM<MAC64STRUCTURES>;
578