1 /****************************  mac2asm.cpp   *********************************
2 * Author:        Agner Fog
3 * Date created:  2007-05-24
4 * Last modified: 2008-05-12
5 * Project:       objconv
6 * Module:        mac2asm.cpp
7 * Description:
8 * Module for disassembling Mach-O files
9 *
10 * Copyright 2007-2008 GNU General Public License http://www.gnu.org/licenses
11 *****************************************************************************/
12 #include "stdafx.h"
13 
14 // Constructor
15 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
CMAC2ASM()16 CMAC2ASM<MACSTRUCTURES>::CMAC2ASM() {
17 }
18 
19 // Convert
20 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
Convert()21 void CMAC2ASM<MACSTRUCTURES>::Convert() {
22    // Do the conversion
23 
24    // Check cpu type
25    switch (this->FileHeader.cputype) {
26    case MAC_CPU_TYPE_I386:
27       this->WordSize = 32;  break;
28 
29    case MAC_CPU_TYPE_X86_64:
30       this->WordSize = 64;  break;
31 
32    default:
33       // Wrong type
34       err.submit(2011, "");  return;
35    }
36 
37    // check object/executable file type
38    uint32_t ExeType;                     // File type: 0 = object, 1 = position independent shared object, 2 = executable
39 
40    switch (this->FileHeader.filetype) {
41    case MAC_OBJECT:   // Relocatable object file
42       ExeType = 0;  break;
43 
44    case MAC_FVMLIB:   // fixed VM shared library file
45    case MAC_DYLIB:    // dynamicly bound shared library file
46    case MAC_BUNDLE:   // part of universal binary
47       ExeType = 1;  break;
48 
49    case MAC_EXECUTE:  // demand paged executable file
50    case MAC_CORE:     // core file
51    case MAC_PRELOAD:  // preloaded executable file
52       ExeType = 2;  break;
53 
54    default:  // Other types
55       err.submit(2011, "");  return;
56    }
57 
58    // Tell disassembler
59    // Disasm.Init(ExeType, this->ImageBase);
60    Disasm.Init(ExeType, 0);
61 
62    // Make Sections list and relocations list
63    MakeSectionList();
64 
65    // Make Symbols list in Disasm
66    MakeSymbolList();
67 
68    // Make relocations list in Disasm
69    MakeRelocations();
70 
71    // Make symbol entries for imported symbols
72    MakeImports();
73 
74    Disasm.Go();                                  // Disassemble
75 
76    *this << Disasm.OutFile;                      // Take over output file from Disasm
77 }
78 
79 // MakeSectionList
80 
81 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
MakeSectionList()82 void CMAC2ASM<MACSTRUCTURES>::MakeSectionList() {
83    // Make Sections list and Relocations list in Disasm
84 
85    uint32_t icmd;                        // Command index
86    int32_t  isec1;                       // Section index within segment
87    int32_t  isec2 = 0;                   // Section index global
88    int32_t  nsect;                       // Number of sections in segment
89    uint32_t cmd;                         // Load command
90    uint32_t cmdsize;                     // Command size
91 
92    StringBuffer.Push(0, 1);            // Initialize string buffer
93 
94    // Pointer to current position
95    uint8_t * currentp = (uint8_t*)(this->Buf() + sizeof(TMAC_header));
96 
97    // Loop through file commands
98    for (icmd = 1; icmd <= this->FileHeader.ncmds; icmd++) {
99       cmd     = ((MAC_load_command*)currentp) -> cmd;
100       cmdsize = ((MAC_load_command*)currentp) -> cmdsize;
101 
102       if (cmd == MAC_LC_SEGMENT || cmd == MAC_LC_SEGMENT_64) {
103          // This is a segment command
104          if ((this->WordSize == 64) ^ (cmd == MAC_LC_SEGMENT_64)) {
105             // Inconsistent word size
106             err.submit(2320);  break;
107          }
108 
109          // Number of sections in segment
110          nsect   = ((TMAC_segment_command*)currentp) -> nsects;
111 
112          // Find first section header
113          TMAC_section * sectp = (TMAC_section*)(currentp + sizeof(TMAC_segment_command));
114 
115          // Loop through section headers
116          for (isec1 = 1; isec1 <= nsect; isec1++, sectp++) {
117 
118             if (sectp->offset >= this->GetDataSize()) {
119                // points outside file
120                err.submit(2035);  break;
121             }
122 
123             // Get section properties
124             isec2++;                   // Section number
125             uint32_t MacSectionType = sectp->flags & MAC_SECTION_TYPE;
126             uint8_t * Buffer = (uint8_t*)(this->Buf()) + sectp->offset;
127             uint32_t TotalSize = (uint32_t)sectp->size;
128             uint32_t InitSize = TotalSize;
129             if (MacSectionType == MAC_S_ZEROFILL) InitSize = 0;
130             uint32_t SectionAddress = (uint32_t)sectp->addr;
131             uint32_t Align = sectp->align;
132 
133             // Get section type
134             // 0 = unknown, 1 = code, 2 = data, 3 = uninitialized data, 4 = constant data
135             uint32_t Type = 0;
136             if (sectp->flags & (MAC_S_ATTR_PURE_INSTRUCTIONS | MAC_S_ATTR_SOME_INSTRUCTIONS)) {
137                Type = 1; // code
138             }
139             else if (MacSectionType == MAC_S_ZEROFILL) {
140                Type = 3; // uninitialized data
141             }
142             else {
143                Type = 2; // data or anything else
144             }
145 
146             // Make section name by combining segment name and section name
147             uint32_t NameOffset = StringBuffer.Push(sectp->segname, (uint32_t)strlen(sectp->segname)); // Segment name
148             StringBuffer.Push(".", 1);  // Separate by dot
149             StringBuffer.PushString(sectp->sectname);  // Section name
150             char * Name = (char*)StringBuffer.Buf() + NameOffset;
151 
152             // Save section record
153             Disasm.AddSection(Buffer, InitSize, TotalSize, SectionAddress, Type, Align, this->WordSize, Name);
154 
155             // Save information about relocation list for this section
156             if (sectp->nreloc) {
157                MAC_SECT_WITH_RELOC RelList = {isec2, sectp->offset, sectp->nreloc, sectp->reloff};
158                RelocationQueue.Push(RelList);
159             }
160 
161             // Find import tables
162             if (MacSectionType >= MAC_S_NON_LAZY_SYMBOL_POINTERS && MacSectionType <= MAC_S_LAZY_SYMBOL_POINTERS /*?*/) {
163                // This is an import table
164                ImportSections.Push(sectp);
165             }
166             // Find literals sections
167             if (MacSectionType == MAC_S_4BYTE_LITERALS || MacSectionType == MAC_S_8BYTE_LITERALS) {
168                // This is a literals section
169                ImportSections.Push(sectp);
170             }
171          }
172       }
173       currentp += cmdsize;
174    }
175 }
176 
177 // MakeRelocations
178 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
MakeRelocations()179 void CMAC2ASM<MACSTRUCTURES>::MakeRelocations() {
180    // Make relocations for object and executable files
181    uint32_t iqq;                         // Index into RelocationQueue = table of relocation tables
182    uint32_t irel;                        // Index into relocation table
183    int32_t  Section;                     // Section index
184    uint32_t SectOffset;                  // File offset of section binary data
185    uint32_t NumReloc;                    // Number of relocations records for this section
186    uint32_t ReltabOffset;                // File offset of relocation table for this section
187    uint32_t SourceOffset;                // Section-relative offset of relocation source
188    uint32_t SourceSize;                  // Size of relocation source
189    int32_t  Inline = 0;                  // Inline addend at relocation source
190    uint32_t TargetAddress;               // Base-relative address of relocation target
191    uint32_t TargetSymbol;                // Symbol index of target
192    //int32_t  TargetSection;             // Target section
193    int32_t  Addend;                      // Offset to add to target
194    uint32_t ReferenceAddress;            // Base-relative address of reference point
195    uint32_t ReferenceSymbol;             // Symbol index of reference point
196    uint32_t R_Type;                      // Relocation type in Mach-O record
197    uint32_t R_Type2;                     // Relocation type of second entry of a pair
198    uint32_t R_PCRel;                     // Relocation is self-relative
199    uint32_t RelType = 0;                 // Relocation type translated to disasm record
200 
201    // Loop through RelocationQueue. There is one entry for each relocation table
202    for (iqq = 0; iqq < RelocationQueue.GetNumEntries(); iqq++) {
203       Section = RelocationQueue[iqq].Section;              // Section index
204       SectOffset = RelocationQueue[iqq].SectOffset;        // File offset of section binary data
205       NumReloc = RelocationQueue[iqq].NumReloc;            // Number of relocations records for this section
206       ReltabOffset = RelocationQueue[iqq].ReltabOffset;    // File offset of relocation table for this section
207 
208       if (NumReloc == 0) continue;
209 
210       if (ReltabOffset == 0 || ReltabOffset >= this->GetDataSize() || ReltabOffset + NumReloc*sizeof(MAC_relocation_info) >= this->GetDataSize()) {
211          // Pointer out of range
212          err.submit(2035);  return;
213       }
214 
215       // pointer to relocation info
216       union {
217          MAC_relocation_info * r;
218          MAC_scattered_relocation_info * s;
219          int8_t * b;
220       } relp;
221       // Point to first relocation entry
222       relp.b = this->Buf() + ReltabOffset;
223 
224       // Loop through relocation table
225       for (irel = 0; irel < NumReloc; irel++, relp.r++) {
226 
227          // Set defaults
228          ReferenceAddress = ReferenceSymbol = TargetSymbol = Addend = 0;
229 
230          if (relp.s->r_scattered) {
231             // scattered relocation entry
232             SourceOffset  = relp.s->r_address;
233             SourceSize    = 1 << relp.s->r_length;
234             R_PCRel       = relp.s->r_pcrel;
235             R_Type        = relp.s->r_type;
236             TargetAddress = relp.s->r_value;
237             TargetSymbol  = 0;
238          }
239          else {
240             // non-scattered relocation entry
241             SourceOffset  = relp.r->r_address;
242             SourceSize    = 1 << relp.r->r_length;
243             R_PCRel       = relp.r->r_pcrel;
244             R_Type        = relp.r->r_type;
245             if (relp.r->r_extern) {
246                TargetSymbol = relp.r->r_symbolnum + 1;
247             }
248             else {
249                //TargetSection = relp.r->r_symbolnum;
250             }
251             TargetAddress = 0;
252          }
253 
254          if (this->WordSize == 32 && (R_Type == MAC32_RELOC_SECTDIFF || R_Type == MAC32_RELOC_LOCAL_SECTDIFF)) {
255             // This is the first of a pair of relocation entries.
256             // Get second entry containing reference point
257             irel++;  relp.r++;
258             if (irel >= NumReloc) {err.submit(2050); break;}
259 
260             if (relp.s->r_scattered) {
261                // scattered relocation entry
262                R_Type2          = relp.s->r_type;
263                ReferenceAddress = relp.s->r_value;
264                ReferenceSymbol  = 0;
265             }
266             else {
267                // non-scattered relocation entry
268                ReferenceSymbol  = relp.r->r_symbolnum + 1;
269                R_Type2          = relp.r->r_type;
270                ReferenceAddress = 0;
271             }
272             if (R_Type2 != MAC32_RELOC_PAIR) {err.submit(2050); break;}
273 
274             if (ReferenceSymbol == 0) {
275                // Reference point has no symbol index. Make one
276                ReferenceSymbol = Disasm.AddSymbol(ASM_SEGMENT_IMGREL, ReferenceAddress, 0, 0, 2, 0, 0);
277             }
278          }
279 
280          if (this->WordSize == 64 && R_Type == MAC64_RELOC_SUBTRACTOR) {
281             // This is the first of a pair of relocation entries.
282             // The first entry contains reference point to subtract
283             irel++;  relp.r++;
284             if (irel >= NumReloc || relp.s->r_scattered || relp.r->r_type != MAC64_RELOC_UNSIGNED) {
285                err.submit(2050); break;
286             }
287             ReferenceSymbol = TargetSymbol;
288             R_PCRel       = relp.r->r_pcrel;
289             if (relp.r->r_extern) {
290                TargetSymbol = relp.r->r_symbolnum + 1;
291             }
292             else {
293                //TargetSection = relp.r->r_symbolnum;
294             }
295             TargetAddress = 0;
296          }
297 
298          // Get inline addend or address
299          if (SectOffset + SourceOffset < this->GetDataSize()) {
300             switch (SourceSize) {
301             case 1:
302                Inline = CMemoryBuffer::Get<int8_t>(SectOffset+SourceOffset);
303                // (this->Get<int8_t> doesn't work on Gnu compiler 4.0.1)
304                break;
305             case 2:
306                Inline = CMemoryBuffer::Get<int16_t>(SectOffset+SourceOffset);
307                break;
308             case 4: case 8:
309                Inline = CMemoryBuffer::Get<int32_t>(SectOffset+SourceOffset);
310                break;
311             default:
312                Inline = 0;
313             }
314          }
315 
316          if (this->WordSize == 32) {
317             // Calculate target address and addend, 32 bit system
318             if (R_Type == MAC32_RELOC_SECTDIFF || R_Type == MAC32_RELOC_LOCAL_SECTDIFF) {
319                // Relative to reference point
320                // Compensate for inline value = TargetAddress - ReferenceAddress;
321                Addend = ReferenceAddress - TargetAddress;
322             }
323             else if (R_PCRel) {
324                // Self-relative
325                TargetAddress += Inline + SourceOffset + SourceSize;
326                Addend = -4 - Inline;
327             }
328             else {
329                // Direct
330                TargetAddress += Inline;
331                Addend = -Inline;
332             }
333          }
334 
335          if (TargetSymbol == 0) {
336             // Target has no symbol index. Make one
337             TargetSymbol = Disasm.AddSymbol(ASM_SEGMENT_IMGREL, TargetAddress, 0, 0, 2, 0, 0);
338          }
339 
340          // Find type
341          if (this->WordSize == 32) {
342             switch (R_Type) {
343             case MAC32_RELOC_VANILLA:
344                // Direct or self-relative
345                RelType = R_PCRel ? 2 : 1;
346                break;
347 
348             case MAC32_RELOC_SECTDIFF: case MAC32_RELOC_LOCAL_SECTDIFF:
349                // Relative to reference point
350                RelType = 0x10;
351                break;
352 
353             case MAC32_RELOC_PB_LA_PTR:
354                // Lazy pointer
355                RelType = 0x41; //??
356                break;
357 
358             default:
359                // Unknown type
360                err.submit(2030, R_Type);
361                break;
362             }
363          }
364          else { // 64-bit relocation types
365             switch (R_Type) {
366             case MAC64_RELOC_UNSIGNED:
367                // Absolute address
368                RelType = 1;
369                break;
370             case MAC64_RELOC_BRANCH:
371                // Signed 32-bit displacement with implicit -4 addend
372             case MAC64_RELOC_SIGNED:
373                // Signed 32-bit displacement with implicit -4 addend
374             case MAC64_RELOC_SIGNED_1:
375                // Signed 32-bit displacement with implicit -4 addend and explicit -1 addend
376             case MAC64_RELOC_SIGNED_2:
377                // Signed 32-bit displacement with implicit -4 addend and explicit -2 addend
378             case MAC64_RELOC_SIGNED_4:
379                // Signed 32-bit displacement with implicit -4 addend and explicit -4 addend
380                RelType = 2;  Addend -= 4;
381                break;
382             case MAC64_RELOC_GOT:
383                // Absolute or relative reference to GOT?
384                // RelType = 0x1001; break;
385             case MAC64_RELOC_GOT_LOAD:
386                // Signed 32-bit displacement to GOT
387                RelType = 0x1002;  Addend -= 4;
388                break;
389             case MAC64_RELOC_SUBTRACTOR:
390                // 32 or 64 bit relative to arbitrary reference point
391                RelType = 0x10;
392                break;
393             default:
394                // Unknown type
395                err.submit(2030, R_Type);
396                break;
397             }
398          }
399 
400          // Make relocation record
401          Disasm.AddRelocation(Section, SourceOffset, Addend,
402             RelType, SourceSize, TargetSymbol, ReferenceSymbol);
403       }
404    }
405 }
406 
407 // MakeSymbolList
408 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
MakeSymbolList()409 void CMAC2ASM<MACSTRUCTURES>::MakeSymbolList() {
410    // Make Symbols list in Disasm
411    uint32_t symi;                        // Symbol index, 0-based
412    uint32_t symn = 0;                    // Symbol number, 1-based
413    char * Name;                        // Symbol name
414    int32_t  Section;                     // Section number (1-based). 0 = external, ASM_SEGMENT_ABSOLUTE = absolute, ASM_SEGMENT_IMGREL = image-relative
415    uint32_t Offset;                      // Offset into section. (Value for absolute symbol)
416    uint32_t Type;                        // Symbol type. Use values listed above for SOpcodeDef operands. 0 = unknown type
417    uint32_t Scope;                       // 1 = function local, 2 = file local, 4 = public, 8 = weak public, 0x10 = communal, 0x20 = external
418 
419    // pointer to string table
420    char * strtab = (char*)(this->Buf() + this->StringTabOffset);
421 
422    // loop through symbol table
423    TMAC_nlist * symp = (TMAC_nlist*)(this->Buf() + this->SymTabOffset);
424    for (symi = 0; symi < this->SymTabNumber; symi++, symp++) {
425 
426       if (symp->n_type & MAC_N_STAB) {
427          // Debug symbol. Ignore
428          continue;
429       }
430 
431       if (symp->n_strx < this->StringTabSize) {
432          // Normal symbol
433          Section = symp->n_sect;
434          Offset  = (uint32_t)symp->n_value;
435          Name    = strtab + symp->n_strx;
436          symn    = symi + 1;           // Convert 0-based to 1-based index
437 
438          // Get scope
439          if (symi < this->iextdefsym) {
440             // Local
441             Scope = 2;
442          }
443          else if (Section && (symp->n_type & MAC_N_TYPE) != MAC_N_UNDF) {
444             // Public
445             Scope = 4;
446          }
447          else {
448             // External
449             Scope = 0x20;
450          }
451          // Check if absolute
452          if ((symp->n_type & MAC_N_TYPE) == MAC_N_ABS) {
453             // Absolute
454             Section = ASM_SEGMENT_ABSOLUTE;  Scope = 4;
455          }
456          // Check if weak/communal
457          if (symp->n_type & MAC_N_PEXT) {
458             // Communal?
459             Scope = 0x10;
460          }
461          else if (symp->n_desc & MAC_N_WEAK_DEF) {
462             // Weak public
463             Scope = 8;
464          }
465          else if (symp->n_desc & MAC_N_WEAK_REF) {
466             // Weak external (not supported by disassembler)
467             Scope = 0x20;
468          }
469          // Get type
470          Type = 0;
471 
472          // Offset is always based, not section-relative
473          if (Section > 0) Section = ASM_SEGMENT_IMGREL;
474 
475          // Add symbol to diassembler
476          Disasm.AddSymbol(Section, Offset, 0, Type, Scope, symn, Name);
477       }
478    }
479 }
480 
481 template <class TMAC_header, class TMAC_segment_command, class TMAC_section, class TMAC_nlist, class MInt>
MakeImports()482 void CMAC2ASM<MACSTRUCTURES>::MakeImports() {
483    // Make symbol entries for all import tables
484    uint32_t isec;                        // Index into ImportSections list
485    uint32_t SectionType;                 // Section type
486    TMAC_section * sectp;                // Pointer to section
487    TMAC_nlist * symp0 = (TMAC_nlist*)(this->Buf() + this->SymTabOffset); // Pointer to symbol table
488    uint32_t * IndSymp = (uint32_t*)(this->Buf() + this->IndirectSymTabOffset); // Pointer to indirect symbol table
489    uint32_t iimp;                        // Index into import table
490    char * strtab = (char*)(this->Buf() + this->StringTabOffset);    // pointer to string table
491 
492    // Loop through import sections
493    for (isec = 0; isec < ImportSections.GetNumEntries(); isec++) {
494       // Pointer to section header
495       sectp = ImportSections[isec];
496       // Section type
497       SectionType = sectp->flags & MAC_SECTION_TYPE;
498       if (SectionType >= MAC_S_NON_LAZY_SYMBOL_POINTERS && SectionType <= MAC_S_MOD_INIT_FUNC_POINTERS) {
499 
500          // This section contains import tables
501          // Entry size in import table
502          uint32_t EntrySize = sectp->reserved2;
503          // Entry size is 4 if not specified
504          if (EntrySize == 0) EntrySize = 4;
505          // Number of entries
506          uint32_t NumEntries = (uint32_t)sectp->size / EntrySize;
507          // Index into indirect symbol table entry of first entry in import table
508          uint32_t Firsti = sectp->reserved1;
509          // Check if within range
510          if (Firsti + NumEntries > this->IndirectSymTabNumber) {
511             // This occurs when disassembling 64-bit Mach-O executable
512             // I don't know how to interpret the import table
513             err.submit(1054);  continue;
514          }
515          // Loop through import table entries
516          for (iimp = 0; iimp < NumEntries; iimp++) {
517             // Address of import table entry
518             uint32_t ImportAddress = (uint32_t)sectp->addr + iimp * EntrySize;
519             // Get symbol table index from indirect symbol table
520             uint32_t symi = IndSymp[iimp + Firsti];
521             // Check index
522             if (symi == 0x80000000) {
523                // This value occurs. Maybe it means ignore?
524                continue;
525             }
526             // Check if index within symbol table
527             if (symi >= this->SymTabNumber) {
528                err.submit(1052); continue;
529             }
530             // Find name
531             uint32_t StringIndex = symp0[symi].n_strx;
532             if (StringIndex >= this->StringTabSize) {
533                err.submit(1052); continue;
534             }
535             const char * Name = strtab + StringIndex;
536             // Name of .so to import from
537             const char * DLLName = "?";
538 
539             // Symbol type
540             uint32_t Type = 0;
541             switch (SectionType) {
542          case MAC_S_NON_LAZY_SYMBOL_POINTERS:
543          case MAC_S_LAZY_SYMBOL_POINTERS:
544             // pointer to symbol
545             Type = 3;  break;
546          case MAC_S_SYMBOL_STUBS:
547             // jump to function
548             Type = 0x83;
549             // Make appear as direct call
550             DLLName = 0;
551             break;
552          case MAC_S_MOD_INIT_FUNC_POINTERS:
553             // function pointer?
554             Type = 0x0C;  break;
555             }
556 
557             // Make symbol record for disassembler
558             Disasm.AddSymbol(ASM_SEGMENT_IMGREL, ImportAddress, 4, Type, 2, 0, Name, DLLName);
559          }
560       }
561       else if (SectionType == MAC_S_4BYTE_LITERALS) {
562          // Section contains 4-byte float constants.
563          // Make symbol
564          Disasm.AddSymbol(ASM_SEGMENT_IMGREL, (uint32_t)sectp->addr, 4, 0x43, 2, 0, "Float_constants");
565       }
566       else if (SectionType == MAC_S_8BYTE_LITERALS) {
567          // Section contains 8-byte double constants.
568          // Make symbol
569          Disasm.AddSymbol(ASM_SEGMENT_IMGREL, (uint32_t)sectp->addr, 8, 0x44, 2, 0, "Double_constants");
570       }
571    }
572 }
573 
574 
575 // Make template instances for 32 and 64 bits
576 template class CMAC2ASM<MAC32STRUCTURES>;
577 template class CMAC2ASM<MAC64STRUCTURES>;
578