xref: /openbsd/gnu/llvm/lldb/examples/python/mach_o.py (revision f6aab3d8)
1#!/usr/bin/env python
2
3import cmd
4import dict_utils
5import file_extract
6import optparse
7import re
8import struct
9import string
10import io
11import sys
12import uuid
13
14# Mach header "magic" constants
15MH_MAGIC = 0xfeedface
16MH_CIGAM = 0xcefaedfe
17MH_MAGIC_64 = 0xfeedfacf
18MH_CIGAM_64 = 0xcffaedfe
19FAT_MAGIC = 0xcafebabe
20FAT_CIGAM = 0xbebafeca
21
22# Mach haeder "filetype" constants
23MH_OBJECT = 0x00000001
24MH_EXECUTE = 0x00000002
25MH_FVMLIB = 0x00000003
26MH_CORE = 0x00000004
27MH_PRELOAD = 0x00000005
28MH_DYLIB = 0x00000006
29MH_DYLINKER = 0x00000007
30MH_BUNDLE = 0x00000008
31MH_DYLIB_STUB = 0x00000009
32MH_DSYM = 0x0000000a
33MH_KEXT_BUNDLE = 0x0000000b
34
35# Mach haeder "flag" constant bits
36MH_NOUNDEFS = 0x00000001
37MH_INCRLINK = 0x00000002
38MH_DYLDLINK = 0x00000004
39MH_BINDATLOAD = 0x00000008
40MH_PREBOUND = 0x00000010
41MH_SPLIT_SEGS = 0x00000020
42MH_LAZY_INIT = 0x00000040
43MH_TWOLEVEL = 0x00000080
44MH_FORCE_FLAT = 0x00000100
45MH_NOMULTIDEFS = 0x00000200
46MH_NOFIXPREBINDING = 0x00000400
47MH_PREBINDABLE = 0x00000800
48MH_ALLMODSBOUND = 0x00001000
49MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000
50MH_CANONICAL = 0x00004000
51MH_WEAK_DEFINES = 0x00008000
52MH_BINDS_TO_WEAK = 0x00010000
53MH_ALLOW_STACK_EXECUTION = 0x00020000
54MH_ROOT_SAFE = 0x00040000
55MH_SETUID_SAFE = 0x00080000
56MH_NO_REEXPORTED_DYLIBS = 0x00100000
57MH_PIE = 0x00200000
58MH_DEAD_STRIPPABLE_DYLIB = 0x00400000
59MH_HAS_TLV_DESCRIPTORS = 0x00800000
60MH_NO_HEAP_EXECUTION = 0x01000000
61
62# Mach load command constants
63LC_REQ_DYLD = 0x80000000
64LC_SEGMENT = 0x00000001
65LC_SYMTAB = 0x00000002
66LC_SYMSEG = 0x00000003
67LC_THREAD = 0x00000004
68LC_UNIXTHREAD = 0x00000005
69LC_LOADFVMLIB = 0x00000006
70LC_IDFVMLIB = 0x00000007
71LC_IDENT = 0x00000008
72LC_FVMFILE = 0x00000009
73LC_PREPAGE = 0x0000000a
74LC_DYSYMTAB = 0x0000000b
75LC_LOAD_DYLIB = 0x0000000c
76LC_ID_DYLIB = 0x0000000d
77LC_LOAD_DYLINKER = 0x0000000e
78LC_ID_DYLINKER = 0x0000000f
79LC_PREBOUND_DYLIB = 0x00000010
80LC_ROUTINES = 0x00000011
81LC_SUB_FRAMEWORK = 0x00000012
82LC_SUB_UMBRELLA = 0x00000013
83LC_SUB_CLIENT = 0x00000014
84LC_SUB_LIBRARY = 0x00000015
85LC_TWOLEVEL_HINTS = 0x00000016
86LC_PREBIND_CKSUM = 0x00000017
87LC_LOAD_WEAK_DYLIB = 0x00000018 | LC_REQ_DYLD
88LC_SEGMENT_64 = 0x00000019
89LC_ROUTINES_64 = 0x0000001a
90LC_UUID = 0x0000001b
91LC_RPATH = 0x0000001c | LC_REQ_DYLD
92LC_CODE_SIGNATURE = 0x0000001d
93LC_SEGMENT_SPLIT_INFO = 0x0000001e
94LC_REEXPORT_DYLIB = 0x0000001f | LC_REQ_DYLD
95LC_LAZY_LOAD_DYLIB = 0x00000020
96LC_ENCRYPTION_INFO = 0x00000021
97LC_DYLD_INFO = 0x00000022
98LC_DYLD_INFO_ONLY = 0x00000022 | LC_REQ_DYLD
99LC_LOAD_UPWARD_DYLIB = 0x00000023 | LC_REQ_DYLD
100LC_VERSION_MIN_MACOSX = 0x00000024
101LC_VERSION_MIN_IPHONEOS = 0x00000025
102LC_FUNCTION_STARTS = 0x00000026
103LC_DYLD_ENVIRONMENT = 0x00000027
104
105# Mach CPU constants
106CPU_ARCH_MASK = 0xff000000
107CPU_ARCH_ABI64 = 0x01000000
108CPU_TYPE_ANY = 0xffffffff
109CPU_TYPE_VAX = 1
110CPU_TYPE_MC680x0 = 6
111CPU_TYPE_I386 = 7
112CPU_TYPE_X86_64 = CPU_TYPE_I386 | CPU_ARCH_ABI64
113CPU_TYPE_MIPS = 8
114CPU_TYPE_MC98000 = 10
115CPU_TYPE_HPPA = 11
116CPU_TYPE_ARM = 12
117CPU_TYPE_MC88000 = 13
118CPU_TYPE_SPARC = 14
119CPU_TYPE_I860 = 15
120CPU_TYPE_ALPHA = 16
121CPU_TYPE_POWERPC = 18
122CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
123
124# VM protection constants
125VM_PROT_READ = 1
126VM_PROT_WRITE = 2
127VM_PROT_EXECUTE = 4
128
129# VM protection constants
130N_STAB = 0xe0
131N_PEXT = 0x10
132N_TYPE = 0x0e
133N_EXT = 0x01
134
135# Values for nlist N_TYPE bits of the "Mach.NList.type" field.
136N_UNDF = 0x0
137N_ABS = 0x2
138N_SECT = 0xe
139N_PBUD = 0xc
140N_INDR = 0xa
141
142# Section indexes for the "Mach.NList.sect_idx" fields
143NO_SECT = 0
144MAX_SECT = 255
145
146# Stab defines
147N_GSYM = 0x20
148N_FNAME = 0x22
149N_FUN = 0x24
150N_STSYM = 0x26
151N_LCSYM = 0x28
152N_BNSYM = 0x2e
153N_OPT = 0x3c
154N_RSYM = 0x40
155N_SLINE = 0x44
156N_ENSYM = 0x4e
157N_SSYM = 0x60
158N_SO = 0x64
159N_OSO = 0x66
160N_LSYM = 0x80
161N_BINCL = 0x82
162N_SOL = 0x84
163N_PARAMS = 0x86
164N_VERSION = 0x88
165N_OLEVEL = 0x8A
166N_PSYM = 0xa0
167N_EINCL = 0xa2
168N_ENTRY = 0xa4
169N_LBRAC = 0xc0
170N_EXCL = 0xc2
171N_RBRAC = 0xe0
172N_BCOMM = 0xe2
173N_ECOMM = 0xe4
174N_ECOML = 0xe8
175N_LENG = 0xfe
176
177vm_prot_names = ['---', 'r--', '-w-', 'rw-', '--x', 'r-x', '-wx', 'rwx']
178
179
180def dump_memory(base_addr, data, hex_bytes_len, num_per_line):
181    hex_bytes = data.encode('hex')
182    if hex_bytes_len == -1:
183        hex_bytes_len = len(hex_bytes)
184    addr = base_addr
185    ascii_str = ''
186    i = 0
187    while i < hex_bytes_len:
188        if ((i / 2) % num_per_line) == 0:
189            if i > 0:
190                print(' %s' % (ascii_str))
191                ascii_str = ''
192            print('0x%8.8x:' % (addr + i), end=' ')
193        hex_byte = hex_bytes[i:i + 2]
194        print(hex_byte, end=' ')
195        int_byte = int(hex_byte, 16)
196        ascii_char = '%c' % (int_byte)
197        if int_byte >= 32 and int_byte < 127:
198            ascii_str += ascii_char
199        else:
200            ascii_str += '.'
201        i = i + 2
202    if ascii_str:
203        if (i / 2) % num_per_line:
204            padding = num_per_line - ((i / 2) % num_per_line)
205        else:
206            padding = 0
207        print('%*s%s' % (padding * 3 + 1, '', ascii_str))
208    print()
209
210
211class TerminalColors:
212    '''Simple terminal colors class'''
213
214    def __init__(self, enabled=True):
215        # TODO: discover terminal type from "file" and disable if
216        # it can't handle the color codes
217        self.enabled = enabled
218
219    def reset(self):
220        '''Reset all terminal colors and formatting.'''
221        if self.enabled:
222            return "\x1b[0m"
223        return ''
224
225    def bold(self, on=True):
226        '''Enable or disable bold depending on the "on" parameter.'''
227        if self.enabled:
228            if on:
229                return "\x1b[1m"
230            else:
231                return "\x1b[22m"
232        return ''
233
234    def italics(self, on=True):
235        '''Enable or disable italics depending on the "on" parameter.'''
236        if self.enabled:
237            if on:
238                return "\x1b[3m"
239            else:
240                return "\x1b[23m"
241        return ''
242
243    def underline(self, on=True):
244        '''Enable or disable underline depending on the "on" parameter.'''
245        if self.enabled:
246            if on:
247                return "\x1b[4m"
248            else:
249                return "\x1b[24m"
250        return ''
251
252    def inverse(self, on=True):
253        '''Enable or disable inverse depending on the "on" parameter.'''
254        if self.enabled:
255            if on:
256                return "\x1b[7m"
257            else:
258                return "\x1b[27m"
259        return ''
260
261    def strike(self, on=True):
262        '''Enable or disable strike through depending on the "on" parameter.'''
263        if self.enabled:
264            if on:
265                return "\x1b[9m"
266            else:
267                return "\x1b[29m"
268        return ''
269
270    def black(self, fg=True):
271        '''Set the foreground or background color to black.
272        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
273        if self.enabled:
274            if fg:
275                return "\x1b[30m"
276            else:
277                return "\x1b[40m"
278        return ''
279
280    def red(self, fg=True):
281        '''Set the foreground or background color to red.
282        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
283        if self.enabled:
284            if fg:
285                return "\x1b[31m"
286            else:
287                return "\x1b[41m"
288        return ''
289
290    def green(self, fg=True):
291        '''Set the foreground or background color to green.
292        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
293        if self.enabled:
294            if fg:
295                return "\x1b[32m"
296            else:
297                return "\x1b[42m"
298        return ''
299
300    def yellow(self, fg=True):
301        '''Set the foreground or background color to yellow.
302        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
303        if self.enabled:
304            if fg:
305                return "\x1b[43m"
306            else:
307                return "\x1b[33m"
308        return ''
309
310    def blue(self, fg=True):
311        '''Set the foreground or background color to blue.
312        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
313        if self.enabled:
314            if fg:
315                return "\x1b[34m"
316            else:
317                return "\x1b[44m"
318        return ''
319
320    def magenta(self, fg=True):
321        '''Set the foreground or background color to magenta.
322        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
323        if self.enabled:
324            if fg:
325                return "\x1b[35m"
326            else:
327                return "\x1b[45m"
328        return ''
329
330    def cyan(self, fg=True):
331        '''Set the foreground or background color to cyan.
332        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
333        if self.enabled:
334            if fg:
335                return "\x1b[36m"
336            else:
337                return "\x1b[46m"
338        return ''
339
340    def white(self, fg=True):
341        '''Set the foreground or background color to white.
342        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
343        if self.enabled:
344            if fg:
345                return "\x1b[37m"
346            else:
347                return "\x1b[47m"
348        return ''
349
350    def default(self, fg=True):
351        '''Set the foreground or background color to the default.
352        The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.'''
353        if self.enabled:
354            if fg:
355                return "\x1b[39m"
356            else:
357                return "\x1b[49m"
358        return ''
359
360
361def swap_unpack_char():
362    """Returns the unpack prefix that will for non-native endian-ness."""
363    if struct.pack('H', 1).startswith("\x00"):
364        return '<'
365    return '>'
366
367
368def dump_hex_bytes(addr, s, bytes_per_line=16):
369    i = 0
370    line = ''
371    for ch in s:
372        if (i % bytes_per_line) == 0:
373            if line:
374                print(line)
375            line = '%#8.8x: ' % (addr + i)
376        line += "%02X " % ord(ch)
377        i += 1
378    print(line)
379
380
381def dump_hex_byte_string_diff(addr, a, b, bytes_per_line=16):
382    i = 0
383    line = ''
384    a_len = len(a)
385    b_len = len(b)
386    if a_len < b_len:
387        max_len = b_len
388    else:
389        max_len = a_len
390    tty_colors = TerminalColors(True)
391    for i in range(max_len):
392        ch = None
393        if i < a_len:
394            ch_a = a[i]
395            ch = ch_a
396        else:
397            ch_a = None
398        if i < b_len:
399            ch_b = b[i]
400            if not ch:
401                ch = ch_b
402        else:
403            ch_b = None
404        mismatch = ch_a != ch_b
405        if (i % bytes_per_line) == 0:
406            if line:
407                print(line)
408            line = '%#8.8x: ' % (addr + i)
409        if mismatch:
410            line += tty_colors.red()
411        line += "%02X " % ord(ch)
412        if mismatch:
413            line += tty_colors.default()
414        i += 1
415
416    print(line)
417
418
419class Mach:
420    """Class that does everything mach-o related"""
421
422    class Arch:
423        """Class that implements mach-o architectures"""
424
425        def __init__(self, c=0, s=0):
426            self.cpu = c
427            self.sub = s
428
429        def set_cpu_type(self, c):
430            self.cpu = c
431
432        def set_cpu_subtype(self, s):
433            self.sub = s
434
435        def set_arch(self, c, s):
436            self.cpu = c
437            self.sub = s
438
439        def is_64_bit(self):
440            return (self.cpu & CPU_ARCH_ABI64) != 0
441
442        cpu_infos = [
443            ["arm", CPU_TYPE_ARM, CPU_TYPE_ANY],
444            ["arm", CPU_TYPE_ARM, 0],
445            ["armv4", CPU_TYPE_ARM, 5],
446            ["armv6", CPU_TYPE_ARM, 6],
447            ["armv5", CPU_TYPE_ARM, 7],
448            ["xscale", CPU_TYPE_ARM, 8],
449            ["armv7", CPU_TYPE_ARM, 9],
450            ["armv7f", CPU_TYPE_ARM, 10],
451            ["armv7s", CPU_TYPE_ARM, 11],
452            ["armv7k", CPU_TYPE_ARM, 12],
453            ["armv7m", CPU_TYPE_ARM, 15],
454            ["armv7em", CPU_TYPE_ARM, 16],
455            ["ppc", CPU_TYPE_POWERPC, CPU_TYPE_ANY],
456            ["ppc", CPU_TYPE_POWERPC, 0],
457            ["ppc601", CPU_TYPE_POWERPC, 1],
458            ["ppc602", CPU_TYPE_POWERPC, 2],
459            ["ppc603", CPU_TYPE_POWERPC, 3],
460            ["ppc603e", CPU_TYPE_POWERPC, 4],
461            ["ppc603ev", CPU_TYPE_POWERPC, 5],
462            ["ppc604", CPU_TYPE_POWERPC, 6],
463            ["ppc604e", CPU_TYPE_POWERPC, 7],
464            ["ppc620", CPU_TYPE_POWERPC, 8],
465            ["ppc750", CPU_TYPE_POWERPC, 9],
466            ["ppc7400", CPU_TYPE_POWERPC, 10],
467            ["ppc7450", CPU_TYPE_POWERPC, 11],
468            ["ppc970", CPU_TYPE_POWERPC, 100],
469            ["ppc64", CPU_TYPE_POWERPC64, 0],
470            ["ppc970-64", CPU_TYPE_POWERPC64, 100],
471            ["i386", CPU_TYPE_I386, 3],
472            ["i486", CPU_TYPE_I386, 4],
473            ["i486sx", CPU_TYPE_I386, 0x84],
474            ["i386", CPU_TYPE_I386, CPU_TYPE_ANY],
475            ["x86_64", CPU_TYPE_X86_64, 3],
476            ["x86_64", CPU_TYPE_X86_64, CPU_TYPE_ANY],
477        ]
478
479        def __str__(self):
480            for info in self.cpu_infos:
481                if self.cpu == info[1] and (self.sub & 0x00ffffff) == info[2]:
482                    return info[0]
483            return "{0}.{1}".format(self.cpu, self.sub)
484
485    class Magic(dict_utils.Enum):
486
487        enum = {
488            'MH_MAGIC': MH_MAGIC,
489            'MH_CIGAM': MH_CIGAM,
490            'MH_MAGIC_64': MH_MAGIC_64,
491            'MH_CIGAM_64': MH_CIGAM_64,
492            'FAT_MAGIC': FAT_MAGIC,
493            'FAT_CIGAM': FAT_CIGAM
494        }
495
496        def __init__(self, initial_value=0):
497            dict_utils.Enum.__init__(self, initial_value, self.enum)
498
499        def is_skinny_mach_file(self):
500            return self.value == MH_MAGIC or self.value == MH_CIGAM or self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64
501
502        def is_universal_mach_file(self):
503            return self.value == FAT_MAGIC or self.value == FAT_CIGAM
504
505        def unpack(self, data):
506            data.set_byte_order('native')
507            self.value = data.get_uint32()
508
509        def get_byte_order(self):
510            if self.value == MH_CIGAM or self.value == MH_CIGAM_64 or self.value == FAT_CIGAM:
511                return swap_unpack_char()
512            else:
513                return '='
514
515        def is_64_bit(self):
516            return self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64
517
518    def __init__(self):
519        self.magic = Mach.Magic()
520        self.content = None
521        self.path = None
522
523    def extract(self, path, extractor):
524        self.path = path
525        self.unpack(extractor)
526
527    def parse(self, path):
528        self.path = path
529        try:
530            f = open(self.path)
531            file_extractor = file_extract.FileExtract(f, '=')
532            self.unpack(file_extractor)
533            # f.close()
534        except IOError as xxx_todo_changeme:
535            (errno, strerror) = xxx_todo_changeme.args
536            print("I/O error({0}): {1}".format(errno, strerror))
537        except ValueError:
538            print("Could not convert data to an integer.")
539        except:
540            print("Unexpected error:", sys.exc_info()[0])
541            raise
542
543    def compare(self, rhs):
544        self.content.compare(rhs.content)
545
546    def dump(self, options=None):
547        self.content.dump(options)
548
549    def dump_header(self, dump_description=True, options=None):
550        self.content.dump_header(dump_description, options)
551
552    def dump_load_commands(self, dump_description=True, options=None):
553        self.content.dump_load_commands(dump_description, options)
554
555    def dump_sections(self, dump_description=True, options=None):
556        self.content.dump_sections(dump_description, options)
557
558    def dump_section_contents(self, options):
559        self.content.dump_section_contents(options)
560
561    def dump_symtab(self, dump_description=True, options=None):
562        self.content.dump_symtab(dump_description, options)
563
564    def dump_symbol_names_matching_regex(self, regex, file=None):
565        self.content.dump_symbol_names_matching_regex(regex, file)
566
567    def description(self):
568        return self.content.description()
569
570    def unpack(self, data):
571        self.magic.unpack(data)
572        if self.magic.is_skinny_mach_file():
573            self.content = Mach.Skinny(self.path)
574        elif self.magic.is_universal_mach_file():
575            self.content = Mach.Universal(self.path)
576        else:
577            self.content = None
578
579        if self.content is not None:
580            self.content.unpack(data, self.magic)
581
582    def is_valid(self):
583        return self.content is not None
584
585    class Universal:
586
587        def __init__(self, path):
588            self.path = path
589            self.type = 'universal'
590            self.file_off = 0
591            self.magic = None
592            self.nfat_arch = 0
593            self.archs = list()
594
595        def description(self):
596            s = '%#8.8x: %s (' % (self.file_off, self.path)
597            archs_string = ''
598            for arch in self.archs:
599                if len(archs_string):
600                    archs_string += ', '
601                archs_string += '%s' % arch.arch
602            s += archs_string
603            s += ')'
604            return s
605
606        def unpack(self, data, magic=None):
607            self.file_off = data.tell()
608            if magic is None:
609                self.magic = Mach.Magic()
610                self.magic.unpack(data)
611            else:
612                self.magic = magic
613                self.file_off = self.file_off - 4
614            # Universal headers are always in big endian
615            data.set_byte_order('big')
616            self.nfat_arch = data.get_uint32()
617            for i in range(self.nfat_arch):
618                self.archs.append(Mach.Universal.ArchInfo())
619                self.archs[i].unpack(data)
620            for i in range(self.nfat_arch):
621                self.archs[i].mach = Mach.Skinny(self.path)
622                data.seek(self.archs[i].offset, 0)
623                skinny_magic = Mach.Magic()
624                skinny_magic.unpack(data)
625                self.archs[i].mach.unpack(data, skinny_magic)
626
627        def compare(self, rhs):
628            print('error: comparing two universal files is not supported yet')
629            return False
630
631        def dump(self, options):
632            if options.dump_header:
633                print()
634                print("Universal Mach File: magic = %s, nfat_arch = %u" % (self.magic, self.nfat_arch))
635                print()
636            if self.nfat_arch > 0:
637                if options.dump_header:
638                    self.archs[0].dump_header(True, options)
639                    for i in range(self.nfat_arch):
640                        self.archs[i].dump_flat(options)
641                if options.dump_header:
642                    print()
643                for i in range(self.nfat_arch):
644                    self.archs[i].mach.dump(options)
645
646        def dump_header(self, dump_description=True, options=None):
647            if dump_description:
648                print(self.description())
649            for i in range(self.nfat_arch):
650                self.archs[i].mach.dump_header(True, options)
651                print()
652
653        def dump_load_commands(self, dump_description=True, options=None):
654            if dump_description:
655                print(self.description())
656            for i in range(self.nfat_arch):
657                self.archs[i].mach.dump_load_commands(True, options)
658                print()
659
660        def dump_sections(self, dump_description=True, options=None):
661            if dump_description:
662                print(self.description())
663            for i in range(self.nfat_arch):
664                self.archs[i].mach.dump_sections(True, options)
665                print()
666
667        def dump_section_contents(self, options):
668            for i in range(self.nfat_arch):
669                self.archs[i].mach.dump_section_contents(options)
670                print()
671
672        def dump_symtab(self, dump_description=True, options=None):
673            if dump_description:
674                print(self.description())
675            for i in range(self.nfat_arch):
676                self.archs[i].mach.dump_symtab(True, options)
677                print()
678
679        def dump_symbol_names_matching_regex(self, regex, file=None):
680            for i in range(self.nfat_arch):
681                self.archs[i].mach.dump_symbol_names_matching_regex(
682                    regex, file)
683
684        class ArchInfo:
685
686            def __init__(self):
687                self.arch = Mach.Arch(0, 0)
688                self.offset = 0
689                self.size = 0
690                self.align = 0
691                self.mach = None
692
693            def unpack(self, data):
694                # Universal headers are always in big endian
695                data.set_byte_order('big')
696                self.arch.cpu, self.arch.sub, self.offset, self.size, self.align = data.get_n_uint32(
697                    5)
698
699            def dump_header(self, dump_description=True, options=None):
700                if options.verbose:
701                    print("CPU        SUBTYPE    OFFSET     SIZE       ALIGN")
702                    print("---------- ---------- ---------- ---------- ----------")
703                else:
704                    print("ARCH       FILEOFFSET FILESIZE   ALIGN")
705                    print("---------- ---------- ---------- ----------")
706
707            def dump_flat(self, options):
708                if options.verbose:
709                    print("%#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (self.arch.cpu, self.arch.sub, self.offset, self.size, self.align))
710                else:
711                    print("%-10s %#8.8x %#8.8x %#8.8x" % (self.arch, self.offset, self.size, self.align))
712
713            def dump(self):
714                print("   cputype: %#8.8x" % self.arch.cpu)
715                print("cpusubtype: %#8.8x" % self.arch.sub)
716                print("    offset: %#8.8x" % self.offset)
717                print("      size: %#8.8x" % self.size)
718                print("     align: %#8.8x" % self.align)
719
720            def __str__(self):
721                return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (
722                    self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)
723
724            def __repr__(self):
725                return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (
726                    self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)
727
728    class Flags:
729
730        def __init__(self, b):
731            self.bits = b
732
733        def __str__(self):
734            s = ''
735            if self.bits & MH_NOUNDEFS:
736                s += 'MH_NOUNDEFS | '
737            if self.bits & MH_INCRLINK:
738                s += 'MH_INCRLINK | '
739            if self.bits & MH_DYLDLINK:
740                s += 'MH_DYLDLINK | '
741            if self.bits & MH_BINDATLOAD:
742                s += 'MH_BINDATLOAD | '
743            if self.bits & MH_PREBOUND:
744                s += 'MH_PREBOUND | '
745            if self.bits & MH_SPLIT_SEGS:
746                s += 'MH_SPLIT_SEGS | '
747            if self.bits & MH_LAZY_INIT:
748                s += 'MH_LAZY_INIT | '
749            if self.bits & MH_TWOLEVEL:
750                s += 'MH_TWOLEVEL | '
751            if self.bits & MH_FORCE_FLAT:
752                s += 'MH_FORCE_FLAT | '
753            if self.bits & MH_NOMULTIDEFS:
754                s += 'MH_NOMULTIDEFS | '
755            if self.bits & MH_NOFIXPREBINDING:
756                s += 'MH_NOFIXPREBINDING | '
757            if self.bits & MH_PREBINDABLE:
758                s += 'MH_PREBINDABLE | '
759            if self.bits & MH_ALLMODSBOUND:
760                s += 'MH_ALLMODSBOUND | '
761            if self.bits & MH_SUBSECTIONS_VIA_SYMBOLS:
762                s += 'MH_SUBSECTIONS_VIA_SYMBOLS | '
763            if self.bits & MH_CANONICAL:
764                s += 'MH_CANONICAL | '
765            if self.bits & MH_WEAK_DEFINES:
766                s += 'MH_WEAK_DEFINES | '
767            if self.bits & MH_BINDS_TO_WEAK:
768                s += 'MH_BINDS_TO_WEAK | '
769            if self.bits & MH_ALLOW_STACK_EXECUTION:
770                s += 'MH_ALLOW_STACK_EXECUTION | '
771            if self.bits & MH_ROOT_SAFE:
772                s += 'MH_ROOT_SAFE | '
773            if self.bits & MH_SETUID_SAFE:
774                s += 'MH_SETUID_SAFE | '
775            if self.bits & MH_NO_REEXPORTED_DYLIBS:
776                s += 'MH_NO_REEXPORTED_DYLIBS | '
777            if self.bits & MH_PIE:
778                s += 'MH_PIE | '
779            if self.bits & MH_DEAD_STRIPPABLE_DYLIB:
780                s += 'MH_DEAD_STRIPPABLE_DYLIB | '
781            if self.bits & MH_HAS_TLV_DESCRIPTORS:
782                s += 'MH_HAS_TLV_DESCRIPTORS | '
783            if self.bits & MH_NO_HEAP_EXECUTION:
784                s += 'MH_NO_HEAP_EXECUTION | '
785            # Strip the trailing " |" if we have any flags
786            if len(s) > 0:
787                s = s[0:-2]
788            return s
789
790    class FileType(dict_utils.Enum):
791
792        enum = {
793            'MH_OBJECT': MH_OBJECT,
794            'MH_EXECUTE': MH_EXECUTE,
795            'MH_FVMLIB': MH_FVMLIB,
796            'MH_CORE': MH_CORE,
797            'MH_PRELOAD': MH_PRELOAD,
798            'MH_DYLIB': MH_DYLIB,
799            'MH_DYLINKER': MH_DYLINKER,
800            'MH_BUNDLE': MH_BUNDLE,
801            'MH_DYLIB_STUB': MH_DYLIB_STUB,
802            'MH_DSYM': MH_DSYM,
803            'MH_KEXT_BUNDLE': MH_KEXT_BUNDLE
804        }
805
806        def __init__(self, initial_value=0):
807            dict_utils.Enum.__init__(self, initial_value, self.enum)
808
809    class Skinny:
810
811        def __init__(self, path):
812            self.path = path
813            self.type = 'skinny'
814            self.data = None
815            self.file_off = 0
816            self.magic = 0
817            self.arch = Mach.Arch(0, 0)
818            self.filetype = Mach.FileType(0)
819            self.ncmds = 0
820            self.sizeofcmds = 0
821            self.flags = Mach.Flags(0)
822            self.uuid = None
823            self.commands = list()
824            self.segments = list()
825            self.sections = list()
826            self.symbols = list()
827            self.sections.append(Mach.Section())
828
829        def description(self):
830            return '%#8.8x: %s (%s)' % (self.file_off, self.path, self.arch)
831
832        def unpack(self, data, magic=None):
833            self.data = data
834            self.file_off = data.tell()
835            if magic is None:
836                self.magic = Mach.Magic()
837                self.magic.unpack(data)
838            else:
839                self.magic = magic
840                self.file_off = self.file_off - 4
841            data.set_byte_order(self.magic.get_byte_order())
842            self.arch.cpu, self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, bits = data.get_n_uint32(
843                6)
844            self.flags.bits = bits
845
846            if self.is_64_bit():
847                data.get_uint32()  # Skip reserved word in mach_header_64
848
849            for i in range(0, self.ncmds):
850                lc = self.unpack_load_command(data)
851                self.commands.append(lc)
852
853        def get_data(self):
854            if self.data:
855                self.data.set_byte_order(self.magic.get_byte_order())
856                return self.data
857            return None
858
859        def unpack_load_command(self, data):
860            lc = Mach.LoadCommand()
861            lc.unpack(self, data)
862            lc_command = lc.command.get_enum_value()
863            if (lc_command == LC_SEGMENT or
864                    lc_command == LC_SEGMENT_64):
865                lc = Mach.SegmentLoadCommand(lc)
866                lc.unpack(self, data)
867            elif (lc_command == LC_LOAD_DYLIB or
868                  lc_command == LC_ID_DYLIB or
869                  lc_command == LC_LOAD_WEAK_DYLIB or
870                  lc_command == LC_REEXPORT_DYLIB):
871                lc = Mach.DylibLoadCommand(lc)
872                lc.unpack(self, data)
873            elif (lc_command == LC_LOAD_DYLINKER or
874                  lc_command == LC_SUB_FRAMEWORK or
875                  lc_command == LC_SUB_CLIENT or
876                  lc_command == LC_SUB_UMBRELLA or
877                  lc_command == LC_SUB_LIBRARY or
878                  lc_command == LC_ID_DYLINKER or
879                  lc_command == LC_RPATH):
880                lc = Mach.LoadDYLDLoadCommand(lc)
881                lc.unpack(self, data)
882            elif (lc_command == LC_DYLD_INFO_ONLY):
883                lc = Mach.DYLDInfoOnlyLoadCommand(lc)
884                lc.unpack(self, data)
885            elif (lc_command == LC_SYMTAB):
886                lc = Mach.SymtabLoadCommand(lc)
887                lc.unpack(self, data)
888            elif (lc_command == LC_DYSYMTAB):
889                lc = Mach.DYLDSymtabLoadCommand(lc)
890                lc.unpack(self, data)
891            elif (lc_command == LC_UUID):
892                lc = Mach.UUIDLoadCommand(lc)
893                lc.unpack(self, data)
894            elif (lc_command == LC_CODE_SIGNATURE or
895                  lc_command == LC_SEGMENT_SPLIT_INFO or
896                  lc_command == LC_FUNCTION_STARTS):
897                lc = Mach.DataBlobLoadCommand(lc)
898                lc.unpack(self, data)
899            elif (lc_command == LC_UNIXTHREAD):
900                lc = Mach.UnixThreadLoadCommand(lc)
901                lc.unpack(self, data)
902            elif (lc_command == LC_ENCRYPTION_INFO):
903                lc = Mach.EncryptionInfoLoadCommand(lc)
904                lc.unpack(self, data)
905            lc.skip(data)
906            return lc
907
908        def compare(self, rhs):
909            print("\nComparing:")
910            print("a) %s %s" % (self.arch, self.path))
911            print("b) %s %s" % (rhs.arch, rhs.path))
912            result = True
913            if self.type == rhs.type:
914                for lhs_section in self.sections[1:]:
915                    rhs_section = rhs.get_section_by_section(lhs_section)
916                    if rhs_section:
917                        print('comparing %s.%s...' % (lhs_section.segname, lhs_section.sectname), end=' ')
918                        sys.stdout.flush()
919                        lhs_data = lhs_section.get_contents(self)
920                        rhs_data = rhs_section.get_contents(rhs)
921                        if lhs_data and rhs_data:
922                            if lhs_data == rhs_data:
923                                print('ok')
924                            else:
925                                lhs_data_len = len(lhs_data)
926                                rhs_data_len = len(rhs_data)
927                                # if lhs_data_len < rhs_data_len:
928                                #     if lhs_data == rhs_data[0:lhs_data_len]:
929                                #         print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
930                                #     else:
931                                #         # TODO: check padding
932                                #         result = False
933                                # elif lhs_data_len > rhs_data_len:
934                                #     if lhs_data[0:rhs_data_len] == rhs_data:
935                                #         print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len)
936                                #     else:
937                                #         # TODO: check padding
938                                #         result = False
939                                # else:
940                                result = False
941                                print('error: sections differ')
942                                # print 'a) %s' % (lhs_section)
943                                # dump_hex_byte_string_diff(0, lhs_data, rhs_data)
944                                # print 'b) %s' % (rhs_section)
945                                # dump_hex_byte_string_diff(0, rhs_data, lhs_data)
946                        elif lhs_data and not rhs_data:
947                            print('error: section data missing from b:')
948                            print('a) %s' % (lhs_section))
949                            print('b) %s' % (rhs_section))
950                            result = False
951                        elif not lhs_data and rhs_data:
952                            print('error: section data missing from a:')
953                            print('a) %s' % (lhs_section))
954                            print('b) %s' % (rhs_section))
955                            result = False
956                        elif lhs_section.offset or rhs_section.offset:
957                            print('error: section data missing for both a and b:')
958                            print('a) %s' % (lhs_section))
959                            print('b) %s' % (rhs_section))
960                            result = False
961                        else:
962                            print('ok')
963                    else:
964                        result = False
965                        print('error: section %s is missing in %s' % (lhs_section.sectname, rhs.path))
966            else:
967                print('error: comparing a %s mach-o file with a %s mach-o file is not supported' % (self.type, rhs.type))
968                result = False
969            if not result:
970                print('error: mach files differ')
971            return result
972
973        def dump_header(self, dump_description=True, options=None):
974            if options.verbose:
975                print("MAGIC      CPU        SUBTYPE    FILETYPE   NUM CMDS SIZE CMDS  FLAGS")
976                print("---------- ---------- ---------- ---------- -------- ---------- ----------")
977            else:
978                print("MAGIC        ARCH       FILETYPE       NUM CMDS SIZE CMDS  FLAGS")
979                print("------------ ---------- -------------- -------- ---------- ----------")
980
981        def dump_flat(self, options):
982            if options.verbose:
983                print("%#8.8x %#8.8x %#8.8x %#8.8x %#8u %#8.8x %#8.8x" % (self.magic, self.arch.cpu, self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, self.flags.bits))
984            else:
985                print("%-12s %-10s %-14s %#8u %#8.8x %s" % (self.magic, self.arch, self.filetype, self.ncmds, self.sizeofcmds, self.flags))
986
987        def dump(self, options):
988            if options.dump_header:
989                self.dump_header(True, options)
990            if options.dump_load_commands:
991                self.dump_load_commands(False, options)
992            if options.dump_sections:
993                self.dump_sections(False, options)
994            if options.section_names:
995                self.dump_section_contents(options)
996            if options.dump_symtab:
997                self.get_symtab()
998                if len(self.symbols):
999                    self.dump_sections(False, options)
1000                else:
1001                    print("No symbols")
1002            if options.find_mangled:
1003                self.dump_symbol_names_matching_regex(re.compile('^_?_Z'))
1004
1005        def dump_header(self, dump_description=True, options=None):
1006            if dump_description:
1007                print(self.description())
1008            print("Mach Header")
1009            print("       magic: %#8.8x %s" % (self.magic.value, self.magic))
1010            print("     cputype: %#8.8x %s" % (self.arch.cpu, self.arch))
1011            print("  cpusubtype: %#8.8x" % self.arch.sub)
1012            print("    filetype: %#8.8x %s" % (self.filetype.get_enum_value(), self.filetype.get_enum_name()))
1013            print("       ncmds: %#8.8x %u" % (self.ncmds, self.ncmds))
1014            print("  sizeofcmds: %#8.8x" % self.sizeofcmds)
1015            print("       flags: %#8.8x %s" % (self.flags.bits, self.flags))
1016
1017        def dump_load_commands(self, dump_description=True, options=None):
1018            if dump_description:
1019                print(self.description())
1020            for lc in self.commands:
1021                print(lc)
1022
1023        def get_section_by_name(self, name):
1024            for section in self.sections:
1025                if section.sectname and section.sectname == name:
1026                    return section
1027            return None
1028
1029        def get_section_by_section(self, other_section):
1030            for section in self.sections:
1031                if section.sectname == other_section.sectname and section.segname == other_section.segname:
1032                    return section
1033            return None
1034
1035        def dump_sections(self, dump_description=True, options=None):
1036            if dump_description:
1037                print(self.description())
1038            num_sections = len(self.sections)
1039            if num_sections > 1:
1040                self.sections[1].dump_header()
1041                for sect_idx in range(1, num_sections):
1042                    print("%s" % self.sections[sect_idx])
1043
1044        def dump_section_contents(self, options):
1045            saved_section_to_disk = False
1046            for sectname in options.section_names:
1047                section = self.get_section_by_name(sectname)
1048                if section:
1049                    sect_bytes = section.get_contents(self)
1050                    if options.outfile:
1051                        if not saved_section_to_disk:
1052                            outfile = open(options.outfile, 'w')
1053                            if options.extract_modules:
1054                                # print "Extracting modules from mach file..."
1055                                data = file_extract.FileExtract(
1056                                    io.BytesIO(sect_bytes), self.data.byte_order)
1057                                version = data.get_uint32()
1058                                num_modules = data.get_uint32()
1059                                # print "version = %u, num_modules = %u" %
1060                                # (version, num_modules)
1061                                for i in range(num_modules):
1062                                    data_offset = data.get_uint64()
1063                                    data_size = data.get_uint64()
1064                                    name_offset = data.get_uint32()
1065                                    language = data.get_uint32()
1066                                    flags = data.get_uint32()
1067                                    data.seek(name_offset)
1068                                    module_name = data.get_c_string()
1069                                    # print "module[%u] data_offset = %#16.16x,
1070                                    # data_size = %#16.16x, name_offset =
1071                                    # %#16.16x (%s), language = %u, flags =
1072                                    # %#x" % (i, data_offset, data_size,
1073                                    # name_offset, module_name, language,
1074                                    # flags)
1075                                    data.seek(data_offset)
1076                                    outfile.write(data.read_size(data_size))
1077                            else:
1078                                print("Saving section %s to '%s'" % (sectname, options.outfile))
1079                                outfile.write(sect_bytes)
1080                            outfile.close()
1081                            saved_section_to_disk = True
1082                        else:
1083                            print("error: you can only save a single section to disk at a time, skipping section '%s'" % (sectname))
1084                    else:
1085                        print('section %s:\n' % (sectname))
1086                        section.dump_header()
1087                        print('%s\n' % (section))
1088                        dump_memory(0, sect_bytes, options.max_count, 16)
1089                else:
1090                    print('error: no section named "%s" was found' % (sectname))
1091
1092        def get_segment(self, segname):
1093            if len(self.segments) == 1 and self.segments[0].segname == '':
1094                return self.segments[0]
1095            for segment in self.segments:
1096                if segment.segname == segname:
1097                    return segment
1098            return None
1099
1100        def get_first_load_command(self, lc_enum_value):
1101            for lc in self.commands:
1102                if lc.command.value == lc_enum_value:
1103                    return lc
1104            return None
1105
1106        def get_symtab(self):
1107            if self.data and not self.symbols:
1108                lc_symtab = self.get_first_load_command(LC_SYMTAB)
1109                if lc_symtab:
1110                    symtab_offset = self.file_off
1111                    if self.data.is_in_memory():
1112                        linkedit_segment = self.get_segment('__LINKEDIT')
1113                        if linkedit_segment:
1114                            linkedit_vmaddr = linkedit_segment.vmaddr
1115                            linkedit_fileoff = linkedit_segment.fileoff
1116                            symtab_offset = linkedit_vmaddr + lc_symtab.symoff - linkedit_fileoff
1117                            symtab_offset = linkedit_vmaddr + lc_symtab.stroff - linkedit_fileoff
1118                    else:
1119                        symtab_offset += lc_symtab.symoff
1120
1121                    self.data.seek(symtab_offset)
1122                    is_64 = self.is_64_bit()
1123                    for i in range(lc_symtab.nsyms):
1124                        nlist = Mach.NList()
1125                        nlist.unpack(self, self.data, lc_symtab)
1126                        self.symbols.append(nlist)
1127                else:
1128                    print("no LC_SYMTAB")
1129
1130        def dump_symtab(self, dump_description=True, options=None):
1131            self.get_symtab()
1132            if dump_description:
1133                print(self.description())
1134            for i, symbol in enumerate(self.symbols):
1135                print('[%5u] %s' % (i, symbol))
1136
1137        def dump_symbol_names_matching_regex(self, regex, file=None):
1138            self.get_symtab()
1139            for symbol in self.symbols:
1140                if symbol.name and regex.search(symbol.name):
1141                    print(symbol.name)
1142                    if file:
1143                        file.write('%s\n' % (symbol.name))
1144
1145        def is_64_bit(self):
1146            return self.magic.is_64_bit()
1147
1148    class LoadCommand:
1149
1150        class Command(dict_utils.Enum):
1151            enum = {
1152                'LC_SEGMENT': LC_SEGMENT,
1153                'LC_SYMTAB': LC_SYMTAB,
1154                'LC_SYMSEG': LC_SYMSEG,
1155                'LC_THREAD': LC_THREAD,
1156                'LC_UNIXTHREAD': LC_UNIXTHREAD,
1157                'LC_LOADFVMLIB': LC_LOADFVMLIB,
1158                'LC_IDFVMLIB': LC_IDFVMLIB,
1159                'LC_IDENT': LC_IDENT,
1160                'LC_FVMFILE': LC_FVMFILE,
1161                'LC_PREPAGE': LC_PREPAGE,
1162                'LC_DYSYMTAB': LC_DYSYMTAB,
1163                'LC_LOAD_DYLIB': LC_LOAD_DYLIB,
1164                'LC_ID_DYLIB': LC_ID_DYLIB,
1165                'LC_LOAD_DYLINKER': LC_LOAD_DYLINKER,
1166                'LC_ID_DYLINKER': LC_ID_DYLINKER,
1167                'LC_PREBOUND_DYLIB': LC_PREBOUND_DYLIB,
1168                'LC_ROUTINES': LC_ROUTINES,
1169                'LC_SUB_FRAMEWORK': LC_SUB_FRAMEWORK,
1170                'LC_SUB_UMBRELLA': LC_SUB_UMBRELLA,
1171                'LC_SUB_CLIENT': LC_SUB_CLIENT,
1172                'LC_SUB_LIBRARY': LC_SUB_LIBRARY,
1173                'LC_TWOLEVEL_HINTS': LC_TWOLEVEL_HINTS,
1174                'LC_PREBIND_CKSUM': LC_PREBIND_CKSUM,
1175                'LC_LOAD_WEAK_DYLIB': LC_LOAD_WEAK_DYLIB,
1176                'LC_SEGMENT_64': LC_SEGMENT_64,
1177                'LC_ROUTINES_64': LC_ROUTINES_64,
1178                'LC_UUID': LC_UUID,
1179                'LC_RPATH': LC_RPATH,
1180                'LC_CODE_SIGNATURE': LC_CODE_SIGNATURE,
1181                'LC_SEGMENT_SPLIT_INFO': LC_SEGMENT_SPLIT_INFO,
1182                'LC_REEXPORT_DYLIB': LC_REEXPORT_DYLIB,
1183                'LC_LAZY_LOAD_DYLIB': LC_LAZY_LOAD_DYLIB,
1184                'LC_ENCRYPTION_INFO': LC_ENCRYPTION_INFO,
1185                'LC_DYLD_INFO': LC_DYLD_INFO,
1186                'LC_DYLD_INFO_ONLY': LC_DYLD_INFO_ONLY,
1187                'LC_LOAD_UPWARD_DYLIB': LC_LOAD_UPWARD_DYLIB,
1188                'LC_VERSION_MIN_MACOSX': LC_VERSION_MIN_MACOSX,
1189                'LC_VERSION_MIN_IPHONEOS': LC_VERSION_MIN_IPHONEOS,
1190                'LC_FUNCTION_STARTS': LC_FUNCTION_STARTS,
1191                'LC_DYLD_ENVIRONMENT': LC_DYLD_ENVIRONMENT
1192            }
1193
1194            def __init__(self, initial_value=0):
1195                dict_utils.Enum.__init__(self, initial_value, self.enum)
1196
1197        def __init__(self, c=None, l=0, o=0):
1198            if c is not None:
1199                self.command = c
1200            else:
1201                self.command = Mach.LoadCommand.Command(0)
1202            self.length = l
1203            self.file_off = o
1204
1205        def unpack(self, mach_file, data):
1206            self.file_off = data.tell()
1207            self.command.value, self.length = data.get_n_uint32(2)
1208
1209        def skip(self, data):
1210            data.seek(self.file_off + self.length, 0)
1211
1212        def __str__(self):
1213            lc_name = self.command.get_enum_name()
1214            return '%#8.8x: <%#4.4x> %-24s' % (self.file_off,
1215                                               self.length, lc_name)
1216
1217    class Section:
1218
1219        def __init__(self):
1220            self.index = 0
1221            self.is_64 = False
1222            self.sectname = None
1223            self.segname = None
1224            self.addr = 0
1225            self.size = 0
1226            self.offset = 0
1227            self.align = 0
1228            self.reloff = 0
1229            self.nreloc = 0
1230            self.flags = 0
1231            self.reserved1 = 0
1232            self.reserved2 = 0
1233            self.reserved3 = 0
1234
1235        def unpack(self, is_64, data):
1236            self.is_64 = is_64
1237            self.sectname = data.get_fixed_length_c_string(16, '', True)
1238            self.segname = data.get_fixed_length_c_string(16, '', True)
1239            if self.is_64:
1240                self.addr, self.size = data.get_n_uint64(2)
1241                self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3 = data.get_n_uint32(
1242                    8)
1243            else:
1244                self.addr, self.size = data.get_n_uint32(2)
1245                self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2 = data.get_n_uint32(
1246                    7)
1247
1248        def dump_header(self):
1249            if self.is_64:
1250                print("INDEX ADDRESS            SIZE               OFFSET     ALIGN      RELOFF     NRELOC     FLAGS      RESERVED1  RESERVED2  RESERVED3  NAME")
1251                print("===== ------------------ ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------")
1252            else:
1253                print("INDEX ADDRESS    SIZE       OFFSET     ALIGN      RELOFF     NRELOC     FLAGS      RESERVED1  RESERVED2  NAME")
1254                print("===== ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------")
1255
1256        def __str__(self):
1257            if self.is_64:
1258                return "[%3u] %#16.16x %#16.16x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % (
1259                    self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3, self.segname, self.sectname)
1260            else:
1261                return "[%3u] %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % (
1262                    self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.segname, self.sectname)
1263
1264        def get_contents(self, mach_file):
1265            '''Get the section contents as a python string'''
1266            if self.size > 0 and mach_file.get_segment(
1267                    self.segname).filesize > 0:
1268                data = mach_file.get_data()
1269                if data:
1270                    section_data_offset = mach_file.file_off + self.offset
1271                    # print '%s.%s is at offset 0x%x with size 0x%x' %
1272                    # (self.segname, self.sectname, section_data_offset,
1273                    # self.size)
1274                    data.push_offset_and_seek(section_data_offset)
1275                    bytes = data.read_size(self.size)
1276                    data.pop_offset_and_seek()
1277                    return bytes
1278            return None
1279
1280    class DylibLoadCommand(LoadCommand):
1281
1282        def __init__(self, lc):
1283            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1284            self.name = None
1285            self.timestamp = 0
1286            self.current_version = 0
1287            self.compatibility_version = 0
1288
1289        def unpack(self, mach_file, data):
1290            byte_order_char = mach_file.magic.get_byte_order()
1291            name_offset, self.timestamp, self.current_version, self.compatibility_version = data.get_n_uint32(
1292                4)
1293            data.seek(self.file_off + name_offset, 0)
1294            self.name = data.get_fixed_length_c_string(self.length - 24)
1295
1296        def __str__(self):
1297            s = Mach.LoadCommand.__str__(self)
1298            s += "%#8.8x %#8.8x %#8.8x " % (self.timestamp,
1299                                            self.current_version,
1300                                            self.compatibility_version)
1301            s += self.name
1302            return s
1303
1304    class LoadDYLDLoadCommand(LoadCommand):
1305
1306        def __init__(self, lc):
1307            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1308            self.name = None
1309
1310        def unpack(self, mach_file, data):
1311            data.get_uint32()
1312            self.name = data.get_fixed_length_c_string(self.length - 12)
1313
1314        def __str__(self):
1315            s = Mach.LoadCommand.__str__(self)
1316            s += "%s" % self.name
1317            return s
1318
1319    class UnixThreadLoadCommand(LoadCommand):
1320
1321        class ThreadState:
1322
1323            def __init__(self):
1324                self.flavor = 0
1325                self.count = 0
1326                self.register_values = list()
1327
1328            def unpack(self, data):
1329                self.flavor, self.count = data.get_n_uint32(2)
1330                self.register_values = data.get_n_uint32(self.count)
1331
1332            def __str__(self):
1333                s = "flavor = %u, count = %u, regs =" % (
1334                    self.flavor, self.count)
1335                i = 0
1336                for register_value in self.register_values:
1337                    if i % 8 == 0:
1338                        s += "\n                                            "
1339                    s += " %#8.8x" % register_value
1340                    i += 1
1341                return s
1342
1343        def __init__(self, lc):
1344            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1345            self.reg_sets = list()
1346
1347        def unpack(self, mach_file, data):
1348            reg_set = Mach.UnixThreadLoadCommand.ThreadState()
1349            reg_set.unpack(data)
1350            self.reg_sets.append(reg_set)
1351
1352        def __str__(self):
1353            s = Mach.LoadCommand.__str__(self)
1354            for reg_set in self.reg_sets:
1355                s += "%s" % reg_set
1356            return s
1357
1358    class DYLDInfoOnlyLoadCommand(LoadCommand):
1359
1360        def __init__(self, lc):
1361            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1362            self.rebase_off = 0
1363            self.rebase_size = 0
1364            self.bind_off = 0
1365            self.bind_size = 0
1366            self.weak_bind_off = 0
1367            self.weak_bind_size = 0
1368            self.lazy_bind_off = 0
1369            self.lazy_bind_size = 0
1370            self.export_off = 0
1371            self.export_size = 0
1372
1373        def unpack(self, mach_file, data):
1374            byte_order_char = mach_file.magic.get_byte_order()
1375            self.rebase_off, self.rebase_size, self.bind_off, self.bind_size, self.weak_bind_off, self.weak_bind_size, self.lazy_bind_off, self.lazy_bind_size, self.export_off, self.export_size = data.get_n_uint32(
1376                10)
1377
1378        def __str__(self):
1379            s = Mach.LoadCommand.__str__(self)
1380            s += "rebase_off = %#8.8x, rebase_size = %u, " % (
1381                self.rebase_off, self.rebase_size)
1382            s += "bind_off = %#8.8x, bind_size = %u, " % (
1383                self.bind_off, self.bind_size)
1384            s += "weak_bind_off = %#8.8x, weak_bind_size = %u, " % (
1385                self.weak_bind_off, self.weak_bind_size)
1386            s += "lazy_bind_off = %#8.8x, lazy_bind_size = %u, " % (
1387                self.lazy_bind_off, self.lazy_bind_size)
1388            s += "export_off = %#8.8x, export_size = %u, " % (
1389                self.export_off, self.export_size)
1390            return s
1391
1392    class DYLDSymtabLoadCommand(LoadCommand):
1393
1394        def __init__(self, lc):
1395            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1396            self.ilocalsym = 0
1397            self.nlocalsym = 0
1398            self.iextdefsym = 0
1399            self.nextdefsym = 0
1400            self.iundefsym = 0
1401            self.nundefsym = 0
1402            self.tocoff = 0
1403            self.ntoc = 0
1404            self.modtaboff = 0
1405            self.nmodtab = 0
1406            self.extrefsymoff = 0
1407            self.nextrefsyms = 0
1408            self.indirectsymoff = 0
1409            self.nindirectsyms = 0
1410            self.extreloff = 0
1411            self.nextrel = 0
1412            self.locreloff = 0
1413            self.nlocrel = 0
1414
1415        def unpack(self, mach_file, data):
1416            byte_order_char = mach_file.magic.get_byte_order()
1417            self.ilocalsym, self.nlocalsym, self.iextdefsym, self.nextdefsym, self.iundefsym, self.nundefsym, self.tocoff, self.ntoc, self.modtaboff, self.nmodtab, self.extrefsymoff, self.nextrefsyms, self.indirectsymoff, self.nindirectsyms, self.extreloff, self.nextrel, self.locreloff, self.nlocrel = data.get_n_uint32(
1418                18)
1419
1420        def __str__(self):
1421            s = Mach.LoadCommand.__str__(self)
1422            # s += "ilocalsym = %u, nlocalsym = %u, " % (self.ilocalsym, self.nlocalsym)
1423            # s += "iextdefsym = %u, nextdefsym = %u, " % (self.iextdefsym, self.nextdefsym)
1424            # s += "iundefsym %u, nundefsym = %u, " % (self.iundefsym, self.nundefsym)
1425            # s += "tocoff = %#8.8x, ntoc = %u, " % (self.tocoff, self.ntoc)
1426            # s += "modtaboff = %#8.8x, nmodtab = %u, " % (self.modtaboff, self.nmodtab)
1427            # s += "extrefsymoff = %#8.8x, nextrefsyms = %u, " % (self.extrefsymoff, self.nextrefsyms)
1428            # s += "indirectsymoff = %#8.8x, nindirectsyms = %u, " % (self.indirectsymoff, self.nindirectsyms)
1429            # s += "extreloff = %#8.8x, nextrel = %u, " % (self.extreloff, self.nextrel)
1430            # s += "locreloff = %#8.8x, nlocrel = %u" % (self.locreloff,
1431            # self.nlocrel)
1432            s += "ilocalsym      = %-10u, nlocalsym     = %u\n" % (
1433                self.ilocalsym, self.nlocalsym)
1434            s += "                                             iextdefsym     = %-10u, nextdefsym    = %u\n" % (
1435                self.iextdefsym, self.nextdefsym)
1436            s += "                                             iundefsym      = %-10u, nundefsym     = %u\n" % (
1437                self.iundefsym, self.nundefsym)
1438            s += "                                             tocoff         = %#8.8x, ntoc          = %u\n" % (
1439                self.tocoff, self.ntoc)
1440            s += "                                             modtaboff      = %#8.8x, nmodtab       = %u\n" % (
1441                self.modtaboff, self.nmodtab)
1442            s += "                                             extrefsymoff   = %#8.8x, nextrefsyms   = %u\n" % (
1443                self.extrefsymoff, self.nextrefsyms)
1444            s += "                                             indirectsymoff = %#8.8x, nindirectsyms = %u\n" % (
1445                self.indirectsymoff, self.nindirectsyms)
1446            s += "                                             extreloff      = %#8.8x, nextrel       = %u\n" % (
1447                self.extreloff, self.nextrel)
1448            s += "                                             locreloff      = %#8.8x, nlocrel       = %u" % (
1449                self.locreloff, self.nlocrel)
1450            return s
1451
1452    class SymtabLoadCommand(LoadCommand):
1453
1454        def __init__(self, lc):
1455            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1456            self.symoff = 0
1457            self.nsyms = 0
1458            self.stroff = 0
1459            self.strsize = 0
1460
1461        def unpack(self, mach_file, data):
1462            byte_order_char = mach_file.magic.get_byte_order()
1463            self.symoff, self.nsyms, self.stroff, self.strsize = data.get_n_uint32(
1464                4)
1465
1466        def __str__(self):
1467            s = Mach.LoadCommand.__str__(self)
1468            s += "symoff = %#8.8x, nsyms = %u, stroff = %#8.8x, strsize = %u" % (
1469                self.symoff, self.nsyms, self.stroff, self.strsize)
1470            return s
1471
1472    class UUIDLoadCommand(LoadCommand):
1473
1474        def __init__(self, lc):
1475            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1476            self.uuid = None
1477
1478        def unpack(self, mach_file, data):
1479            uuid_data = data.get_n_uint8(16)
1480            uuid_str = ''
1481            for byte in uuid_data:
1482                uuid_str += '%2.2x' % byte
1483            self.uuid = uuid.UUID(uuid_str)
1484            mach_file.uuid = self.uuid
1485
1486        def __str__(self):
1487            s = Mach.LoadCommand.__str__(self)
1488            s += self.uuid.__str__()
1489            return s
1490
1491    class DataBlobLoadCommand(LoadCommand):
1492
1493        def __init__(self, lc):
1494            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1495            self.dataoff = 0
1496            self.datasize = 0
1497
1498        def unpack(self, mach_file, data):
1499            byte_order_char = mach_file.magic.get_byte_order()
1500            self.dataoff, self.datasize = data.get_n_uint32(2)
1501
1502        def __str__(self):
1503            s = Mach.LoadCommand.__str__(self)
1504            s += "dataoff = %#8.8x, datasize = %u" % (
1505                self.dataoff, self.datasize)
1506            return s
1507
1508    class EncryptionInfoLoadCommand(LoadCommand):
1509
1510        def __init__(self, lc):
1511            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1512            self.cryptoff = 0
1513            self.cryptsize = 0
1514            self.cryptid = 0
1515
1516        def unpack(self, mach_file, data):
1517            byte_order_char = mach_file.magic.get_byte_order()
1518            self.cryptoff, self.cryptsize, self.cryptid = data.get_n_uint32(3)
1519
1520        def __str__(self):
1521            s = Mach.LoadCommand.__str__(self)
1522            s += "file-range = [%#8.8x - %#8.8x), cryptsize = %u, cryptid = %u" % (
1523                self.cryptoff, self.cryptoff + self.cryptsize, self.cryptsize, self.cryptid)
1524            return s
1525
1526    class SegmentLoadCommand(LoadCommand):
1527
1528        def __init__(self, lc):
1529            Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off)
1530            self.segname = None
1531            self.vmaddr = 0
1532            self.vmsize = 0
1533            self.fileoff = 0
1534            self.filesize = 0
1535            self.maxprot = 0
1536            self.initprot = 0
1537            self.nsects = 0
1538            self.flags = 0
1539
1540        def unpack(self, mach_file, data):
1541            is_64 = self.command.get_enum_value() == LC_SEGMENT_64
1542            self.segname = data.get_fixed_length_c_string(16, '', True)
1543            if is_64:
1544                self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint64(
1545                    4)
1546            else:
1547                self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint32(
1548                    4)
1549            self.maxprot, self.initprot, self.nsects, self.flags = data.get_n_uint32(
1550                4)
1551            mach_file.segments.append(self)
1552            for i in range(self.nsects):
1553                section = Mach.Section()
1554                section.unpack(is_64, data)
1555                section.index = len(mach_file.sections)
1556                mach_file.sections.append(section)
1557
1558        def __str__(self):
1559            s = Mach.LoadCommand.__str__(self)
1560            if self.command.get_enum_value() == LC_SEGMENT:
1561                s += "%#8.8x %#8.8x %#8.8x %#8.8x " % (
1562                    self.vmaddr, self.vmsize, self.fileoff, self.filesize)
1563            else:
1564                s += "%#16.16x %#16.16x %#16.16x %#16.16x " % (
1565                    self.vmaddr, self.vmsize, self.fileoff, self.filesize)
1566            s += "%s %s %3u %#8.8x" % (vm_prot_names[self.maxprot], vm_prot_names[
1567                                       self.initprot], self.nsects, self.flags)
1568            s += ' ' + self.segname
1569            return s
1570
1571    class NList:
1572
1573        class Type:
1574
1575            class Stab(dict_utils.Enum):
1576                enum = {
1577                    'N_GSYM': N_GSYM,
1578                    'N_FNAME': N_FNAME,
1579                    'N_FUN': N_FUN,
1580                    'N_STSYM': N_STSYM,
1581                    'N_LCSYM': N_LCSYM,
1582                    'N_BNSYM': N_BNSYM,
1583                    'N_OPT': N_OPT,
1584                    'N_RSYM': N_RSYM,
1585                    'N_SLINE': N_SLINE,
1586                    'N_ENSYM': N_ENSYM,
1587                    'N_SSYM': N_SSYM,
1588                    'N_SO': N_SO,
1589                    'N_OSO': N_OSO,
1590                    'N_LSYM': N_LSYM,
1591                    'N_BINCL': N_BINCL,
1592                    'N_SOL': N_SOL,
1593                    'N_PARAMS': N_PARAMS,
1594                    'N_VERSION': N_VERSION,
1595                    'N_OLEVEL': N_OLEVEL,
1596                    'N_PSYM': N_PSYM,
1597                    'N_EINCL': N_EINCL,
1598                    'N_ENTRY': N_ENTRY,
1599                    'N_LBRAC': N_LBRAC,
1600                    'N_EXCL': N_EXCL,
1601                    'N_RBRAC': N_RBRAC,
1602                    'N_BCOMM': N_BCOMM,
1603                    'N_ECOMM': N_ECOMM,
1604                    'N_ECOML': N_ECOML,
1605                    'N_LENG': N_LENG
1606                }
1607
1608                def __init__(self, magic=0):
1609                    dict_utils.Enum.__init__(self, magic, self.enum)
1610
1611            def __init__(self, t=0):
1612                self.value = t
1613
1614            def __str__(self):
1615                n_type = self.value
1616                if n_type & N_STAB:
1617                    stab = Mach.NList.Type.Stab(self.value)
1618                    return '%s' % stab
1619                else:
1620                    type = self.value & N_TYPE
1621                    type_str = ''
1622                    if type == N_UNDF:
1623                        type_str = 'N_UNDF'
1624                    elif type == N_ABS:
1625                        type_str = 'N_ABS '
1626                    elif type == N_SECT:
1627                        type_str = 'N_SECT'
1628                    elif type == N_PBUD:
1629                        type_str = 'N_PBUD'
1630                    elif type == N_INDR:
1631                        type_str = 'N_INDR'
1632                    else:
1633                        type_str = "??? (%#2.2x)" % type
1634                    if n_type & N_PEXT:
1635                        type_str += ' | PEXT'
1636                    if n_type & N_EXT:
1637                        type_str += ' | EXT '
1638                    return type_str
1639
1640        def __init__(self):
1641            self.index = 0
1642            self.name_offset = 0
1643            self.name = 0
1644            self.type = Mach.NList.Type()
1645            self.sect_idx = 0
1646            self.desc = 0
1647            self.value = 0
1648
1649        def unpack(self, mach_file, data, symtab_lc):
1650            self.index = len(mach_file.symbols)
1651            self.name_offset = data.get_uint32()
1652            self.type.value, self.sect_idx = data.get_n_uint8(2)
1653            self.desc = data.get_uint16()
1654            if mach_file.is_64_bit():
1655                self.value = data.get_uint64()
1656            else:
1657                self.value = data.get_uint32()
1658            data.push_offset_and_seek(
1659                mach_file.file_off +
1660                symtab_lc.stroff +
1661                self.name_offset)
1662            # print "get string for symbol[%u]" % self.index
1663            self.name = data.get_c_string()
1664            data.pop_offset_and_seek()
1665
1666        def __str__(self):
1667            name_display = ''
1668            if len(self.name):
1669                name_display = ' "%s"' % self.name
1670            return '%#8.8x %#2.2x (%-20s) %#2.2x %#4.4x %16.16x%s' % (self.name_offset,
1671                                                                      self.type.value, self.type, self.sect_idx, self.desc, self.value, name_display)
1672
1673    class Interactive(cmd.Cmd):
1674        '''Interactive command interpreter to mach-o files.'''
1675
1676        def __init__(self, mach, options):
1677            cmd.Cmd.__init__(self)
1678            self.intro = 'Interactive mach-o command interpreter'
1679            self.prompt = 'mach-o: %s %% ' % mach.path
1680            self.mach = mach
1681            self.options = options
1682
1683        def default(self, line):
1684            '''Catch all for unknown command, which will exit the interpreter.'''
1685            print("uknown command: %s" % line)
1686            return True
1687
1688        def do_q(self, line):
1689            '''Quit command'''
1690            return True
1691
1692        def do_quit(self, line):
1693            '''Quit command'''
1694            return True
1695
1696        def do_header(self, line):
1697            '''Dump mach-o file headers'''
1698            self.mach.dump_header(True, self.options)
1699            return False
1700
1701        def do_load(self, line):
1702            '''Dump all mach-o load commands'''
1703            self.mach.dump_load_commands(True, self.options)
1704            return False
1705
1706        def do_sections(self, line):
1707            '''Dump all mach-o sections'''
1708            self.mach.dump_sections(True, self.options)
1709            return False
1710
1711        def do_symtab(self, line):
1712            '''Dump all mach-o symbols in the symbol table'''
1713            self.mach.dump_symtab(True, self.options)
1714            return False
1715
1716if __name__ == '__main__':
1717    parser = optparse.OptionParser(
1718        description='A script that parses skinny and universal mach-o files.')
1719    parser.add_option(
1720        '--arch',
1721        '-a',
1722        type='string',
1723        metavar='arch',
1724        dest='archs',
1725        action='append',
1726        help='specify one or more architectures by name')
1727    parser.add_option(
1728        '-v',
1729        '--verbose',
1730        action='store_true',
1731        dest='verbose',
1732        help='display verbose debug info',
1733        default=False)
1734    parser.add_option(
1735        '-H',
1736        '--header',
1737        action='store_true',
1738        dest='dump_header',
1739        help='dump the mach-o file header',
1740        default=False)
1741    parser.add_option(
1742        '-l',
1743        '--load-commands',
1744        action='store_true',
1745        dest='dump_load_commands',
1746        help='dump the mach-o load commands',
1747        default=False)
1748    parser.add_option(
1749        '-s',
1750        '--symtab',
1751        action='store_true',
1752        dest='dump_symtab',
1753        help='dump the mach-o symbol table',
1754        default=False)
1755    parser.add_option(
1756        '-S',
1757        '--sections',
1758        action='store_true',
1759        dest='dump_sections',
1760        help='dump the mach-o sections',
1761        default=False)
1762    parser.add_option(
1763        '--section',
1764        type='string',
1765        metavar='sectname',
1766        dest='section_names',
1767        action='append',
1768        help='Specify one or more section names to dump',
1769        default=[])
1770    parser.add_option(
1771        '-o',
1772        '--out',
1773        type='string',
1774        dest='outfile',
1775        help='Used in conjunction with the --section=NAME option to save a single section\'s data to disk.',
1776        default=False)
1777    parser.add_option(
1778        '-i',
1779        '--interactive',
1780        action='store_true',
1781        dest='interactive',
1782        help='enable interactive mode',
1783        default=False)
1784    parser.add_option(
1785        '-m',
1786        '--mangled',
1787        action='store_true',
1788        dest='find_mangled',
1789        help='dump all mangled names in a mach file',
1790        default=False)
1791    parser.add_option(
1792        '-c',
1793        '--compare',
1794        action='store_true',
1795        dest='compare',
1796        help='compare two mach files',
1797        default=False)
1798    parser.add_option(
1799        '-M',
1800        '--extract-modules',
1801        action='store_true',
1802        dest='extract_modules',
1803        help='Extract modules from file',
1804        default=False)
1805    parser.add_option(
1806        '-C',
1807        '--count',
1808        type='int',
1809        dest='max_count',
1810        help='Sets the max byte count when dumping section data',
1811        default=-1)
1812
1813    (options, mach_files) = parser.parse_args()
1814    if options.extract_modules:
1815        if options.section_names:
1816            print("error: can't use --section option with the --extract-modules option")
1817            exit(1)
1818        if not options.outfile:
1819            print("error: the --output=FILE option must be specified with the --extract-modules option")
1820            exit(1)
1821        options.section_names.append("__apple_ast")
1822    if options.compare:
1823        if len(mach_files) == 2:
1824            mach_a = Mach()
1825            mach_b = Mach()
1826            mach_a.parse(mach_files[0])
1827            mach_b.parse(mach_files[1])
1828            mach_a.compare(mach_b)
1829        else:
1830            print('error: --compare takes two mach files as arguments')
1831    else:
1832        if not (options.dump_header or options.dump_load_commands or options.dump_symtab or options.dump_sections or options.find_mangled or options.section_names):
1833            options.dump_header = True
1834            options.dump_load_commands = True
1835        if options.verbose:
1836            print('options', options)
1837            print('mach_files', mach_files)
1838        for path in mach_files:
1839            mach = Mach()
1840            mach.parse(path)
1841            if options.interactive:
1842                interpreter = Mach.Interactive(mach, options)
1843                interpreter.cmdloop()
1844            else:
1845                mach.dump(options)
1846