1#!/usr/bin/env python 2 3import cmd 4import dict_utils 5import file_extract 6import optparse 7import re 8import struct 9import string 10import io 11import sys 12import uuid 13 14# Mach header "magic" constants 15MH_MAGIC = 0xfeedface 16MH_CIGAM = 0xcefaedfe 17MH_MAGIC_64 = 0xfeedfacf 18MH_CIGAM_64 = 0xcffaedfe 19FAT_MAGIC = 0xcafebabe 20FAT_CIGAM = 0xbebafeca 21 22# Mach haeder "filetype" constants 23MH_OBJECT = 0x00000001 24MH_EXECUTE = 0x00000002 25MH_FVMLIB = 0x00000003 26MH_CORE = 0x00000004 27MH_PRELOAD = 0x00000005 28MH_DYLIB = 0x00000006 29MH_DYLINKER = 0x00000007 30MH_BUNDLE = 0x00000008 31MH_DYLIB_STUB = 0x00000009 32MH_DSYM = 0x0000000a 33MH_KEXT_BUNDLE = 0x0000000b 34 35# Mach haeder "flag" constant bits 36MH_NOUNDEFS = 0x00000001 37MH_INCRLINK = 0x00000002 38MH_DYLDLINK = 0x00000004 39MH_BINDATLOAD = 0x00000008 40MH_PREBOUND = 0x00000010 41MH_SPLIT_SEGS = 0x00000020 42MH_LAZY_INIT = 0x00000040 43MH_TWOLEVEL = 0x00000080 44MH_FORCE_FLAT = 0x00000100 45MH_NOMULTIDEFS = 0x00000200 46MH_NOFIXPREBINDING = 0x00000400 47MH_PREBINDABLE = 0x00000800 48MH_ALLMODSBOUND = 0x00001000 49MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000 50MH_CANONICAL = 0x00004000 51MH_WEAK_DEFINES = 0x00008000 52MH_BINDS_TO_WEAK = 0x00010000 53MH_ALLOW_STACK_EXECUTION = 0x00020000 54MH_ROOT_SAFE = 0x00040000 55MH_SETUID_SAFE = 0x00080000 56MH_NO_REEXPORTED_DYLIBS = 0x00100000 57MH_PIE = 0x00200000 58MH_DEAD_STRIPPABLE_DYLIB = 0x00400000 59MH_HAS_TLV_DESCRIPTORS = 0x00800000 60MH_NO_HEAP_EXECUTION = 0x01000000 61 62# Mach load command constants 63LC_REQ_DYLD = 0x80000000 64LC_SEGMENT = 0x00000001 65LC_SYMTAB = 0x00000002 66LC_SYMSEG = 0x00000003 67LC_THREAD = 0x00000004 68LC_UNIXTHREAD = 0x00000005 69LC_LOADFVMLIB = 0x00000006 70LC_IDFVMLIB = 0x00000007 71LC_IDENT = 0x00000008 72LC_FVMFILE = 0x00000009 73LC_PREPAGE = 0x0000000a 74LC_DYSYMTAB = 0x0000000b 75LC_LOAD_DYLIB = 0x0000000c 76LC_ID_DYLIB = 0x0000000d 77LC_LOAD_DYLINKER = 0x0000000e 78LC_ID_DYLINKER = 0x0000000f 79LC_PREBOUND_DYLIB = 0x00000010 80LC_ROUTINES = 0x00000011 81LC_SUB_FRAMEWORK = 0x00000012 82LC_SUB_UMBRELLA = 0x00000013 83LC_SUB_CLIENT = 0x00000014 84LC_SUB_LIBRARY = 0x00000015 85LC_TWOLEVEL_HINTS = 0x00000016 86LC_PREBIND_CKSUM = 0x00000017 87LC_LOAD_WEAK_DYLIB = 0x00000018 | LC_REQ_DYLD 88LC_SEGMENT_64 = 0x00000019 89LC_ROUTINES_64 = 0x0000001a 90LC_UUID = 0x0000001b 91LC_RPATH = 0x0000001c | LC_REQ_DYLD 92LC_CODE_SIGNATURE = 0x0000001d 93LC_SEGMENT_SPLIT_INFO = 0x0000001e 94LC_REEXPORT_DYLIB = 0x0000001f | LC_REQ_DYLD 95LC_LAZY_LOAD_DYLIB = 0x00000020 96LC_ENCRYPTION_INFO = 0x00000021 97LC_DYLD_INFO = 0x00000022 98LC_DYLD_INFO_ONLY = 0x00000022 | LC_REQ_DYLD 99LC_LOAD_UPWARD_DYLIB = 0x00000023 | LC_REQ_DYLD 100LC_VERSION_MIN_MACOSX = 0x00000024 101LC_VERSION_MIN_IPHONEOS = 0x00000025 102LC_FUNCTION_STARTS = 0x00000026 103LC_DYLD_ENVIRONMENT = 0x00000027 104 105# Mach CPU constants 106CPU_ARCH_MASK = 0xff000000 107CPU_ARCH_ABI64 = 0x01000000 108CPU_TYPE_ANY = 0xffffffff 109CPU_TYPE_VAX = 1 110CPU_TYPE_MC680x0 = 6 111CPU_TYPE_I386 = 7 112CPU_TYPE_X86_64 = CPU_TYPE_I386 | CPU_ARCH_ABI64 113CPU_TYPE_MIPS = 8 114CPU_TYPE_MC98000 = 10 115CPU_TYPE_HPPA = 11 116CPU_TYPE_ARM = 12 117CPU_TYPE_MC88000 = 13 118CPU_TYPE_SPARC = 14 119CPU_TYPE_I860 = 15 120CPU_TYPE_ALPHA = 16 121CPU_TYPE_POWERPC = 18 122CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64 123 124# VM protection constants 125VM_PROT_READ = 1 126VM_PROT_WRITE = 2 127VM_PROT_EXECUTE = 4 128 129# VM protection constants 130N_STAB = 0xe0 131N_PEXT = 0x10 132N_TYPE = 0x0e 133N_EXT = 0x01 134 135# Values for nlist N_TYPE bits of the "Mach.NList.type" field. 136N_UNDF = 0x0 137N_ABS = 0x2 138N_SECT = 0xe 139N_PBUD = 0xc 140N_INDR = 0xa 141 142# Section indexes for the "Mach.NList.sect_idx" fields 143NO_SECT = 0 144MAX_SECT = 255 145 146# Stab defines 147N_GSYM = 0x20 148N_FNAME = 0x22 149N_FUN = 0x24 150N_STSYM = 0x26 151N_LCSYM = 0x28 152N_BNSYM = 0x2e 153N_OPT = 0x3c 154N_RSYM = 0x40 155N_SLINE = 0x44 156N_ENSYM = 0x4e 157N_SSYM = 0x60 158N_SO = 0x64 159N_OSO = 0x66 160N_LSYM = 0x80 161N_BINCL = 0x82 162N_SOL = 0x84 163N_PARAMS = 0x86 164N_VERSION = 0x88 165N_OLEVEL = 0x8A 166N_PSYM = 0xa0 167N_EINCL = 0xa2 168N_ENTRY = 0xa4 169N_LBRAC = 0xc0 170N_EXCL = 0xc2 171N_RBRAC = 0xe0 172N_BCOMM = 0xe2 173N_ECOMM = 0xe4 174N_ECOML = 0xe8 175N_LENG = 0xfe 176 177vm_prot_names = ['---', 'r--', '-w-', 'rw-', '--x', 'r-x', '-wx', 'rwx'] 178 179 180def dump_memory(base_addr, data, hex_bytes_len, num_per_line): 181 hex_bytes = data.encode('hex') 182 if hex_bytes_len == -1: 183 hex_bytes_len = len(hex_bytes) 184 addr = base_addr 185 ascii_str = '' 186 i = 0 187 while i < hex_bytes_len: 188 if ((i / 2) % num_per_line) == 0: 189 if i > 0: 190 print(' %s' % (ascii_str)) 191 ascii_str = '' 192 print('0x%8.8x:' % (addr + i), end=' ') 193 hex_byte = hex_bytes[i:i + 2] 194 print(hex_byte, end=' ') 195 int_byte = int(hex_byte, 16) 196 ascii_char = '%c' % (int_byte) 197 if int_byte >= 32 and int_byte < 127: 198 ascii_str += ascii_char 199 else: 200 ascii_str += '.' 201 i = i + 2 202 if ascii_str: 203 if (i / 2) % num_per_line: 204 padding = num_per_line - ((i / 2) % num_per_line) 205 else: 206 padding = 0 207 print('%*s%s' % (padding * 3 + 1, '', ascii_str)) 208 print() 209 210 211class TerminalColors: 212 '''Simple terminal colors class''' 213 214 def __init__(self, enabled=True): 215 # TODO: discover terminal type from "file" and disable if 216 # it can't handle the color codes 217 self.enabled = enabled 218 219 def reset(self): 220 '''Reset all terminal colors and formatting.''' 221 if self.enabled: 222 return "\x1b[0m" 223 return '' 224 225 def bold(self, on=True): 226 '''Enable or disable bold depending on the "on" parameter.''' 227 if self.enabled: 228 if on: 229 return "\x1b[1m" 230 else: 231 return "\x1b[22m" 232 return '' 233 234 def italics(self, on=True): 235 '''Enable or disable italics depending on the "on" parameter.''' 236 if self.enabled: 237 if on: 238 return "\x1b[3m" 239 else: 240 return "\x1b[23m" 241 return '' 242 243 def underline(self, on=True): 244 '''Enable or disable underline depending on the "on" parameter.''' 245 if self.enabled: 246 if on: 247 return "\x1b[4m" 248 else: 249 return "\x1b[24m" 250 return '' 251 252 def inverse(self, on=True): 253 '''Enable or disable inverse depending on the "on" parameter.''' 254 if self.enabled: 255 if on: 256 return "\x1b[7m" 257 else: 258 return "\x1b[27m" 259 return '' 260 261 def strike(self, on=True): 262 '''Enable or disable strike through depending on the "on" parameter.''' 263 if self.enabled: 264 if on: 265 return "\x1b[9m" 266 else: 267 return "\x1b[29m" 268 return '' 269 270 def black(self, fg=True): 271 '''Set the foreground or background color to black. 272 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 273 if self.enabled: 274 if fg: 275 return "\x1b[30m" 276 else: 277 return "\x1b[40m" 278 return '' 279 280 def red(self, fg=True): 281 '''Set the foreground or background color to red. 282 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 283 if self.enabled: 284 if fg: 285 return "\x1b[31m" 286 else: 287 return "\x1b[41m" 288 return '' 289 290 def green(self, fg=True): 291 '''Set the foreground or background color to green. 292 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 293 if self.enabled: 294 if fg: 295 return "\x1b[32m" 296 else: 297 return "\x1b[42m" 298 return '' 299 300 def yellow(self, fg=True): 301 '''Set the foreground or background color to yellow. 302 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 303 if self.enabled: 304 if fg: 305 return "\x1b[43m" 306 else: 307 return "\x1b[33m" 308 return '' 309 310 def blue(self, fg=True): 311 '''Set the foreground or background color to blue. 312 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 313 if self.enabled: 314 if fg: 315 return "\x1b[34m" 316 else: 317 return "\x1b[44m" 318 return '' 319 320 def magenta(self, fg=True): 321 '''Set the foreground or background color to magenta. 322 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 323 if self.enabled: 324 if fg: 325 return "\x1b[35m" 326 else: 327 return "\x1b[45m" 328 return '' 329 330 def cyan(self, fg=True): 331 '''Set the foreground or background color to cyan. 332 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 333 if self.enabled: 334 if fg: 335 return "\x1b[36m" 336 else: 337 return "\x1b[46m" 338 return '' 339 340 def white(self, fg=True): 341 '''Set the foreground or background color to white. 342 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 343 if self.enabled: 344 if fg: 345 return "\x1b[37m" 346 else: 347 return "\x1b[47m" 348 return '' 349 350 def default(self, fg=True): 351 '''Set the foreground or background color to the default. 352 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 353 if self.enabled: 354 if fg: 355 return "\x1b[39m" 356 else: 357 return "\x1b[49m" 358 return '' 359 360 361def swap_unpack_char(): 362 """Returns the unpack prefix that will for non-native endian-ness.""" 363 if struct.pack('H', 1).startswith("\x00"): 364 return '<' 365 return '>' 366 367 368def dump_hex_bytes(addr, s, bytes_per_line=16): 369 i = 0 370 line = '' 371 for ch in s: 372 if (i % bytes_per_line) == 0: 373 if line: 374 print(line) 375 line = '%#8.8x: ' % (addr + i) 376 line += "%02X " % ord(ch) 377 i += 1 378 print(line) 379 380 381def dump_hex_byte_string_diff(addr, a, b, bytes_per_line=16): 382 i = 0 383 line = '' 384 a_len = len(a) 385 b_len = len(b) 386 if a_len < b_len: 387 max_len = b_len 388 else: 389 max_len = a_len 390 tty_colors = TerminalColors(True) 391 for i in range(max_len): 392 ch = None 393 if i < a_len: 394 ch_a = a[i] 395 ch = ch_a 396 else: 397 ch_a = None 398 if i < b_len: 399 ch_b = b[i] 400 if not ch: 401 ch = ch_b 402 else: 403 ch_b = None 404 mismatch = ch_a != ch_b 405 if (i % bytes_per_line) == 0: 406 if line: 407 print(line) 408 line = '%#8.8x: ' % (addr + i) 409 if mismatch: 410 line += tty_colors.red() 411 line += "%02X " % ord(ch) 412 if mismatch: 413 line += tty_colors.default() 414 i += 1 415 416 print(line) 417 418 419class Mach: 420 """Class that does everything mach-o related""" 421 422 class Arch: 423 """Class that implements mach-o architectures""" 424 425 def __init__(self, c=0, s=0): 426 self.cpu = c 427 self.sub = s 428 429 def set_cpu_type(self, c): 430 self.cpu = c 431 432 def set_cpu_subtype(self, s): 433 self.sub = s 434 435 def set_arch(self, c, s): 436 self.cpu = c 437 self.sub = s 438 439 def is_64_bit(self): 440 return (self.cpu & CPU_ARCH_ABI64) != 0 441 442 cpu_infos = [ 443 ["arm", CPU_TYPE_ARM, CPU_TYPE_ANY], 444 ["arm", CPU_TYPE_ARM, 0], 445 ["armv4", CPU_TYPE_ARM, 5], 446 ["armv6", CPU_TYPE_ARM, 6], 447 ["armv5", CPU_TYPE_ARM, 7], 448 ["xscale", CPU_TYPE_ARM, 8], 449 ["armv7", CPU_TYPE_ARM, 9], 450 ["armv7f", CPU_TYPE_ARM, 10], 451 ["armv7s", CPU_TYPE_ARM, 11], 452 ["armv7k", CPU_TYPE_ARM, 12], 453 ["armv7m", CPU_TYPE_ARM, 15], 454 ["armv7em", CPU_TYPE_ARM, 16], 455 ["ppc", CPU_TYPE_POWERPC, CPU_TYPE_ANY], 456 ["ppc", CPU_TYPE_POWERPC, 0], 457 ["ppc601", CPU_TYPE_POWERPC, 1], 458 ["ppc602", CPU_TYPE_POWERPC, 2], 459 ["ppc603", CPU_TYPE_POWERPC, 3], 460 ["ppc603e", CPU_TYPE_POWERPC, 4], 461 ["ppc603ev", CPU_TYPE_POWERPC, 5], 462 ["ppc604", CPU_TYPE_POWERPC, 6], 463 ["ppc604e", CPU_TYPE_POWERPC, 7], 464 ["ppc620", CPU_TYPE_POWERPC, 8], 465 ["ppc750", CPU_TYPE_POWERPC, 9], 466 ["ppc7400", CPU_TYPE_POWERPC, 10], 467 ["ppc7450", CPU_TYPE_POWERPC, 11], 468 ["ppc970", CPU_TYPE_POWERPC, 100], 469 ["ppc64", CPU_TYPE_POWERPC64, 0], 470 ["ppc970-64", CPU_TYPE_POWERPC64, 100], 471 ["i386", CPU_TYPE_I386, 3], 472 ["i486", CPU_TYPE_I386, 4], 473 ["i486sx", CPU_TYPE_I386, 0x84], 474 ["i386", CPU_TYPE_I386, CPU_TYPE_ANY], 475 ["x86_64", CPU_TYPE_X86_64, 3], 476 ["x86_64", CPU_TYPE_X86_64, CPU_TYPE_ANY], 477 ] 478 479 def __str__(self): 480 for info in self.cpu_infos: 481 if self.cpu == info[1] and (self.sub & 0x00ffffff) == info[2]: 482 return info[0] 483 return "{0}.{1}".format(self.cpu, self.sub) 484 485 class Magic(dict_utils.Enum): 486 487 enum = { 488 'MH_MAGIC': MH_MAGIC, 489 'MH_CIGAM': MH_CIGAM, 490 'MH_MAGIC_64': MH_MAGIC_64, 491 'MH_CIGAM_64': MH_CIGAM_64, 492 'FAT_MAGIC': FAT_MAGIC, 493 'FAT_CIGAM': FAT_CIGAM 494 } 495 496 def __init__(self, initial_value=0): 497 dict_utils.Enum.__init__(self, initial_value, self.enum) 498 499 def is_skinny_mach_file(self): 500 return self.value == MH_MAGIC or self.value == MH_CIGAM or self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64 501 502 def is_universal_mach_file(self): 503 return self.value == FAT_MAGIC or self.value == FAT_CIGAM 504 505 def unpack(self, data): 506 data.set_byte_order('native') 507 self.value = data.get_uint32() 508 509 def get_byte_order(self): 510 if self.value == MH_CIGAM or self.value == MH_CIGAM_64 or self.value == FAT_CIGAM: 511 return swap_unpack_char() 512 else: 513 return '=' 514 515 def is_64_bit(self): 516 return self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64 517 518 def __init__(self): 519 self.magic = Mach.Magic() 520 self.content = None 521 self.path = None 522 523 def extract(self, path, extractor): 524 self.path = path 525 self.unpack(extractor) 526 527 def parse(self, path): 528 self.path = path 529 try: 530 f = open(self.path) 531 file_extractor = file_extract.FileExtract(f, '=') 532 self.unpack(file_extractor) 533 # f.close() 534 except IOError as xxx_todo_changeme: 535 (errno, strerror) = xxx_todo_changeme.args 536 print("I/O error({0}): {1}".format(errno, strerror)) 537 except ValueError: 538 print("Could not convert data to an integer.") 539 except: 540 print("Unexpected error:", sys.exc_info()[0]) 541 raise 542 543 def compare(self, rhs): 544 self.content.compare(rhs.content) 545 546 def dump(self, options=None): 547 self.content.dump(options) 548 549 def dump_header(self, dump_description=True, options=None): 550 self.content.dump_header(dump_description, options) 551 552 def dump_load_commands(self, dump_description=True, options=None): 553 self.content.dump_load_commands(dump_description, options) 554 555 def dump_sections(self, dump_description=True, options=None): 556 self.content.dump_sections(dump_description, options) 557 558 def dump_section_contents(self, options): 559 self.content.dump_section_contents(options) 560 561 def dump_symtab(self, dump_description=True, options=None): 562 self.content.dump_symtab(dump_description, options) 563 564 def dump_symbol_names_matching_regex(self, regex, file=None): 565 self.content.dump_symbol_names_matching_regex(regex, file) 566 567 def description(self): 568 return self.content.description() 569 570 def unpack(self, data): 571 self.magic.unpack(data) 572 if self.magic.is_skinny_mach_file(): 573 self.content = Mach.Skinny(self.path) 574 elif self.magic.is_universal_mach_file(): 575 self.content = Mach.Universal(self.path) 576 else: 577 self.content = None 578 579 if self.content is not None: 580 self.content.unpack(data, self.magic) 581 582 def is_valid(self): 583 return self.content is not None 584 585 class Universal: 586 587 def __init__(self, path): 588 self.path = path 589 self.type = 'universal' 590 self.file_off = 0 591 self.magic = None 592 self.nfat_arch = 0 593 self.archs = list() 594 595 def description(self): 596 s = '%#8.8x: %s (' % (self.file_off, self.path) 597 archs_string = '' 598 for arch in self.archs: 599 if len(archs_string): 600 archs_string += ', ' 601 archs_string += '%s' % arch.arch 602 s += archs_string 603 s += ')' 604 return s 605 606 def unpack(self, data, magic=None): 607 self.file_off = data.tell() 608 if magic is None: 609 self.magic = Mach.Magic() 610 self.magic.unpack(data) 611 else: 612 self.magic = magic 613 self.file_off = self.file_off - 4 614 # Universal headers are always in big endian 615 data.set_byte_order('big') 616 self.nfat_arch = data.get_uint32() 617 for i in range(self.nfat_arch): 618 self.archs.append(Mach.Universal.ArchInfo()) 619 self.archs[i].unpack(data) 620 for i in range(self.nfat_arch): 621 self.archs[i].mach = Mach.Skinny(self.path) 622 data.seek(self.archs[i].offset, 0) 623 skinny_magic = Mach.Magic() 624 skinny_magic.unpack(data) 625 self.archs[i].mach.unpack(data, skinny_magic) 626 627 def compare(self, rhs): 628 print('error: comparing two universal files is not supported yet') 629 return False 630 631 def dump(self, options): 632 if options.dump_header: 633 print() 634 print("Universal Mach File: magic = %s, nfat_arch = %u" % (self.magic, self.nfat_arch)) 635 print() 636 if self.nfat_arch > 0: 637 if options.dump_header: 638 self.archs[0].dump_header(True, options) 639 for i in range(self.nfat_arch): 640 self.archs[i].dump_flat(options) 641 if options.dump_header: 642 print() 643 for i in range(self.nfat_arch): 644 self.archs[i].mach.dump(options) 645 646 def dump_header(self, dump_description=True, options=None): 647 if dump_description: 648 print(self.description()) 649 for i in range(self.nfat_arch): 650 self.archs[i].mach.dump_header(True, options) 651 print() 652 653 def dump_load_commands(self, dump_description=True, options=None): 654 if dump_description: 655 print(self.description()) 656 for i in range(self.nfat_arch): 657 self.archs[i].mach.dump_load_commands(True, options) 658 print() 659 660 def dump_sections(self, dump_description=True, options=None): 661 if dump_description: 662 print(self.description()) 663 for i in range(self.nfat_arch): 664 self.archs[i].mach.dump_sections(True, options) 665 print() 666 667 def dump_section_contents(self, options): 668 for i in range(self.nfat_arch): 669 self.archs[i].mach.dump_section_contents(options) 670 print() 671 672 def dump_symtab(self, dump_description=True, options=None): 673 if dump_description: 674 print(self.description()) 675 for i in range(self.nfat_arch): 676 self.archs[i].mach.dump_symtab(True, options) 677 print() 678 679 def dump_symbol_names_matching_regex(self, regex, file=None): 680 for i in range(self.nfat_arch): 681 self.archs[i].mach.dump_symbol_names_matching_regex( 682 regex, file) 683 684 class ArchInfo: 685 686 def __init__(self): 687 self.arch = Mach.Arch(0, 0) 688 self.offset = 0 689 self.size = 0 690 self.align = 0 691 self.mach = None 692 693 def unpack(self, data): 694 # Universal headers are always in big endian 695 data.set_byte_order('big') 696 self.arch.cpu, self.arch.sub, self.offset, self.size, self.align = data.get_n_uint32( 697 5) 698 699 def dump_header(self, dump_description=True, options=None): 700 if options.verbose: 701 print("CPU SUBTYPE OFFSET SIZE ALIGN") 702 print("---------- ---------- ---------- ---------- ----------") 703 else: 704 print("ARCH FILEOFFSET FILESIZE ALIGN") 705 print("---------- ---------- ---------- ----------") 706 707 def dump_flat(self, options): 708 if options.verbose: 709 print("%#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)) 710 else: 711 print("%-10s %#8.8x %#8.8x %#8.8x" % (self.arch, self.offset, self.size, self.align)) 712 713 def dump(self): 714 print(" cputype: %#8.8x" % self.arch.cpu) 715 print("cpusubtype: %#8.8x" % self.arch.sub) 716 print(" offset: %#8.8x" % self.offset) 717 print(" size: %#8.8x" % self.size) 718 print(" align: %#8.8x" % self.align) 719 720 def __str__(self): 721 return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % ( 722 self.arch.cpu, self.arch.sub, self.offset, self.size, self.align) 723 724 def __repr__(self): 725 return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % ( 726 self.arch.cpu, self.arch.sub, self.offset, self.size, self.align) 727 728 class Flags: 729 730 def __init__(self, b): 731 self.bits = b 732 733 def __str__(self): 734 s = '' 735 if self.bits & MH_NOUNDEFS: 736 s += 'MH_NOUNDEFS | ' 737 if self.bits & MH_INCRLINK: 738 s += 'MH_INCRLINK | ' 739 if self.bits & MH_DYLDLINK: 740 s += 'MH_DYLDLINK | ' 741 if self.bits & MH_BINDATLOAD: 742 s += 'MH_BINDATLOAD | ' 743 if self.bits & MH_PREBOUND: 744 s += 'MH_PREBOUND | ' 745 if self.bits & MH_SPLIT_SEGS: 746 s += 'MH_SPLIT_SEGS | ' 747 if self.bits & MH_LAZY_INIT: 748 s += 'MH_LAZY_INIT | ' 749 if self.bits & MH_TWOLEVEL: 750 s += 'MH_TWOLEVEL | ' 751 if self.bits & MH_FORCE_FLAT: 752 s += 'MH_FORCE_FLAT | ' 753 if self.bits & MH_NOMULTIDEFS: 754 s += 'MH_NOMULTIDEFS | ' 755 if self.bits & MH_NOFIXPREBINDING: 756 s += 'MH_NOFIXPREBINDING | ' 757 if self.bits & MH_PREBINDABLE: 758 s += 'MH_PREBINDABLE | ' 759 if self.bits & MH_ALLMODSBOUND: 760 s += 'MH_ALLMODSBOUND | ' 761 if self.bits & MH_SUBSECTIONS_VIA_SYMBOLS: 762 s += 'MH_SUBSECTIONS_VIA_SYMBOLS | ' 763 if self.bits & MH_CANONICAL: 764 s += 'MH_CANONICAL | ' 765 if self.bits & MH_WEAK_DEFINES: 766 s += 'MH_WEAK_DEFINES | ' 767 if self.bits & MH_BINDS_TO_WEAK: 768 s += 'MH_BINDS_TO_WEAK | ' 769 if self.bits & MH_ALLOW_STACK_EXECUTION: 770 s += 'MH_ALLOW_STACK_EXECUTION | ' 771 if self.bits & MH_ROOT_SAFE: 772 s += 'MH_ROOT_SAFE | ' 773 if self.bits & MH_SETUID_SAFE: 774 s += 'MH_SETUID_SAFE | ' 775 if self.bits & MH_NO_REEXPORTED_DYLIBS: 776 s += 'MH_NO_REEXPORTED_DYLIBS | ' 777 if self.bits & MH_PIE: 778 s += 'MH_PIE | ' 779 if self.bits & MH_DEAD_STRIPPABLE_DYLIB: 780 s += 'MH_DEAD_STRIPPABLE_DYLIB | ' 781 if self.bits & MH_HAS_TLV_DESCRIPTORS: 782 s += 'MH_HAS_TLV_DESCRIPTORS | ' 783 if self.bits & MH_NO_HEAP_EXECUTION: 784 s += 'MH_NO_HEAP_EXECUTION | ' 785 # Strip the trailing " |" if we have any flags 786 if len(s) > 0: 787 s = s[0:-2] 788 return s 789 790 class FileType(dict_utils.Enum): 791 792 enum = { 793 'MH_OBJECT': MH_OBJECT, 794 'MH_EXECUTE': MH_EXECUTE, 795 'MH_FVMLIB': MH_FVMLIB, 796 'MH_CORE': MH_CORE, 797 'MH_PRELOAD': MH_PRELOAD, 798 'MH_DYLIB': MH_DYLIB, 799 'MH_DYLINKER': MH_DYLINKER, 800 'MH_BUNDLE': MH_BUNDLE, 801 'MH_DYLIB_STUB': MH_DYLIB_STUB, 802 'MH_DSYM': MH_DSYM, 803 'MH_KEXT_BUNDLE': MH_KEXT_BUNDLE 804 } 805 806 def __init__(self, initial_value=0): 807 dict_utils.Enum.__init__(self, initial_value, self.enum) 808 809 class Skinny: 810 811 def __init__(self, path): 812 self.path = path 813 self.type = 'skinny' 814 self.data = None 815 self.file_off = 0 816 self.magic = 0 817 self.arch = Mach.Arch(0, 0) 818 self.filetype = Mach.FileType(0) 819 self.ncmds = 0 820 self.sizeofcmds = 0 821 self.flags = Mach.Flags(0) 822 self.uuid = None 823 self.commands = list() 824 self.segments = list() 825 self.sections = list() 826 self.symbols = list() 827 self.sections.append(Mach.Section()) 828 829 def description(self): 830 return '%#8.8x: %s (%s)' % (self.file_off, self.path, self.arch) 831 832 def unpack(self, data, magic=None): 833 self.data = data 834 self.file_off = data.tell() 835 if magic is None: 836 self.magic = Mach.Magic() 837 self.magic.unpack(data) 838 else: 839 self.magic = magic 840 self.file_off = self.file_off - 4 841 data.set_byte_order(self.magic.get_byte_order()) 842 self.arch.cpu, self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, bits = data.get_n_uint32( 843 6) 844 self.flags.bits = bits 845 846 if self.is_64_bit(): 847 data.get_uint32() # Skip reserved word in mach_header_64 848 849 for i in range(0, self.ncmds): 850 lc = self.unpack_load_command(data) 851 self.commands.append(lc) 852 853 def get_data(self): 854 if self.data: 855 self.data.set_byte_order(self.magic.get_byte_order()) 856 return self.data 857 return None 858 859 def unpack_load_command(self, data): 860 lc = Mach.LoadCommand() 861 lc.unpack(self, data) 862 lc_command = lc.command.get_enum_value() 863 if (lc_command == LC_SEGMENT or 864 lc_command == LC_SEGMENT_64): 865 lc = Mach.SegmentLoadCommand(lc) 866 lc.unpack(self, data) 867 elif (lc_command == LC_LOAD_DYLIB or 868 lc_command == LC_ID_DYLIB or 869 lc_command == LC_LOAD_WEAK_DYLIB or 870 lc_command == LC_REEXPORT_DYLIB): 871 lc = Mach.DylibLoadCommand(lc) 872 lc.unpack(self, data) 873 elif (lc_command == LC_LOAD_DYLINKER or 874 lc_command == LC_SUB_FRAMEWORK or 875 lc_command == LC_SUB_CLIENT or 876 lc_command == LC_SUB_UMBRELLA or 877 lc_command == LC_SUB_LIBRARY or 878 lc_command == LC_ID_DYLINKER or 879 lc_command == LC_RPATH): 880 lc = Mach.LoadDYLDLoadCommand(lc) 881 lc.unpack(self, data) 882 elif (lc_command == LC_DYLD_INFO_ONLY): 883 lc = Mach.DYLDInfoOnlyLoadCommand(lc) 884 lc.unpack(self, data) 885 elif (lc_command == LC_SYMTAB): 886 lc = Mach.SymtabLoadCommand(lc) 887 lc.unpack(self, data) 888 elif (lc_command == LC_DYSYMTAB): 889 lc = Mach.DYLDSymtabLoadCommand(lc) 890 lc.unpack(self, data) 891 elif (lc_command == LC_UUID): 892 lc = Mach.UUIDLoadCommand(lc) 893 lc.unpack(self, data) 894 elif (lc_command == LC_CODE_SIGNATURE or 895 lc_command == LC_SEGMENT_SPLIT_INFO or 896 lc_command == LC_FUNCTION_STARTS): 897 lc = Mach.DataBlobLoadCommand(lc) 898 lc.unpack(self, data) 899 elif (lc_command == LC_UNIXTHREAD): 900 lc = Mach.UnixThreadLoadCommand(lc) 901 lc.unpack(self, data) 902 elif (lc_command == LC_ENCRYPTION_INFO): 903 lc = Mach.EncryptionInfoLoadCommand(lc) 904 lc.unpack(self, data) 905 lc.skip(data) 906 return lc 907 908 def compare(self, rhs): 909 print("\nComparing:") 910 print("a) %s %s" % (self.arch, self.path)) 911 print("b) %s %s" % (rhs.arch, rhs.path)) 912 result = True 913 if self.type == rhs.type: 914 for lhs_section in self.sections[1:]: 915 rhs_section = rhs.get_section_by_section(lhs_section) 916 if rhs_section: 917 print('comparing %s.%s...' % (lhs_section.segname, lhs_section.sectname), end=' ') 918 sys.stdout.flush() 919 lhs_data = lhs_section.get_contents(self) 920 rhs_data = rhs_section.get_contents(rhs) 921 if lhs_data and rhs_data: 922 if lhs_data == rhs_data: 923 print('ok') 924 else: 925 lhs_data_len = len(lhs_data) 926 rhs_data_len = len(rhs_data) 927 # if lhs_data_len < rhs_data_len: 928 # if lhs_data == rhs_data[0:lhs_data_len]: 929 # print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len) 930 # else: 931 # # TODO: check padding 932 # result = False 933 # elif lhs_data_len > rhs_data_len: 934 # if lhs_data[0:rhs_data_len] == rhs_data: 935 # print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len) 936 # else: 937 # # TODO: check padding 938 # result = False 939 # else: 940 result = False 941 print('error: sections differ') 942 # print 'a) %s' % (lhs_section) 943 # dump_hex_byte_string_diff(0, lhs_data, rhs_data) 944 # print 'b) %s' % (rhs_section) 945 # dump_hex_byte_string_diff(0, rhs_data, lhs_data) 946 elif lhs_data and not rhs_data: 947 print('error: section data missing from b:') 948 print('a) %s' % (lhs_section)) 949 print('b) %s' % (rhs_section)) 950 result = False 951 elif not lhs_data and rhs_data: 952 print('error: section data missing from a:') 953 print('a) %s' % (lhs_section)) 954 print('b) %s' % (rhs_section)) 955 result = False 956 elif lhs_section.offset or rhs_section.offset: 957 print('error: section data missing for both a and b:') 958 print('a) %s' % (lhs_section)) 959 print('b) %s' % (rhs_section)) 960 result = False 961 else: 962 print('ok') 963 else: 964 result = False 965 print('error: section %s is missing in %s' % (lhs_section.sectname, rhs.path)) 966 else: 967 print('error: comparing a %s mach-o file with a %s mach-o file is not supported' % (self.type, rhs.type)) 968 result = False 969 if not result: 970 print('error: mach files differ') 971 return result 972 973 def dump_header(self, dump_description=True, options=None): 974 if options.verbose: 975 print("MAGIC CPU SUBTYPE FILETYPE NUM CMDS SIZE CMDS FLAGS") 976 print("---------- ---------- ---------- ---------- -------- ---------- ----------") 977 else: 978 print("MAGIC ARCH FILETYPE NUM CMDS SIZE CMDS FLAGS") 979 print("------------ ---------- -------------- -------- ---------- ----------") 980 981 def dump_flat(self, options): 982 if options.verbose: 983 print("%#8.8x %#8.8x %#8.8x %#8.8x %#8u %#8.8x %#8.8x" % (self.magic, self.arch.cpu, self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, self.flags.bits)) 984 else: 985 print("%-12s %-10s %-14s %#8u %#8.8x %s" % (self.magic, self.arch, self.filetype, self.ncmds, self.sizeofcmds, self.flags)) 986 987 def dump(self, options): 988 if options.dump_header: 989 self.dump_header(True, options) 990 if options.dump_load_commands: 991 self.dump_load_commands(False, options) 992 if options.dump_sections: 993 self.dump_sections(False, options) 994 if options.section_names: 995 self.dump_section_contents(options) 996 if options.dump_symtab: 997 self.get_symtab() 998 if len(self.symbols): 999 self.dump_sections(False, options) 1000 else: 1001 print("No symbols") 1002 if options.find_mangled: 1003 self.dump_symbol_names_matching_regex(re.compile('^_?_Z')) 1004 1005 def dump_header(self, dump_description=True, options=None): 1006 if dump_description: 1007 print(self.description()) 1008 print("Mach Header") 1009 print(" magic: %#8.8x %s" % (self.magic.value, self.magic)) 1010 print(" cputype: %#8.8x %s" % (self.arch.cpu, self.arch)) 1011 print(" cpusubtype: %#8.8x" % self.arch.sub) 1012 print(" filetype: %#8.8x %s" % (self.filetype.get_enum_value(), self.filetype.get_enum_name())) 1013 print(" ncmds: %#8.8x %u" % (self.ncmds, self.ncmds)) 1014 print(" sizeofcmds: %#8.8x" % self.sizeofcmds) 1015 print(" flags: %#8.8x %s" % (self.flags.bits, self.flags)) 1016 1017 def dump_load_commands(self, dump_description=True, options=None): 1018 if dump_description: 1019 print(self.description()) 1020 for lc in self.commands: 1021 print(lc) 1022 1023 def get_section_by_name(self, name): 1024 for section in self.sections: 1025 if section.sectname and section.sectname == name: 1026 return section 1027 return None 1028 1029 def get_section_by_section(self, other_section): 1030 for section in self.sections: 1031 if section.sectname == other_section.sectname and section.segname == other_section.segname: 1032 return section 1033 return None 1034 1035 def dump_sections(self, dump_description=True, options=None): 1036 if dump_description: 1037 print(self.description()) 1038 num_sections = len(self.sections) 1039 if num_sections > 1: 1040 self.sections[1].dump_header() 1041 for sect_idx in range(1, num_sections): 1042 print("%s" % self.sections[sect_idx]) 1043 1044 def dump_section_contents(self, options): 1045 saved_section_to_disk = False 1046 for sectname in options.section_names: 1047 section = self.get_section_by_name(sectname) 1048 if section: 1049 sect_bytes = section.get_contents(self) 1050 if options.outfile: 1051 if not saved_section_to_disk: 1052 outfile = open(options.outfile, 'w') 1053 if options.extract_modules: 1054 # print "Extracting modules from mach file..." 1055 data = file_extract.FileExtract( 1056 io.BytesIO(sect_bytes), self.data.byte_order) 1057 version = data.get_uint32() 1058 num_modules = data.get_uint32() 1059 # print "version = %u, num_modules = %u" % 1060 # (version, num_modules) 1061 for i in range(num_modules): 1062 data_offset = data.get_uint64() 1063 data_size = data.get_uint64() 1064 name_offset = data.get_uint32() 1065 language = data.get_uint32() 1066 flags = data.get_uint32() 1067 data.seek(name_offset) 1068 module_name = data.get_c_string() 1069 # print "module[%u] data_offset = %#16.16x, 1070 # data_size = %#16.16x, name_offset = 1071 # %#16.16x (%s), language = %u, flags = 1072 # %#x" % (i, data_offset, data_size, 1073 # name_offset, module_name, language, 1074 # flags) 1075 data.seek(data_offset) 1076 outfile.write(data.read_size(data_size)) 1077 else: 1078 print("Saving section %s to '%s'" % (sectname, options.outfile)) 1079 outfile.write(sect_bytes) 1080 outfile.close() 1081 saved_section_to_disk = True 1082 else: 1083 print("error: you can only save a single section to disk at a time, skipping section '%s'" % (sectname)) 1084 else: 1085 print('section %s:\n' % (sectname)) 1086 section.dump_header() 1087 print('%s\n' % (section)) 1088 dump_memory(0, sect_bytes, options.max_count, 16) 1089 else: 1090 print('error: no section named "%s" was found' % (sectname)) 1091 1092 def get_segment(self, segname): 1093 if len(self.segments) == 1 and self.segments[0].segname == '': 1094 return self.segments[0] 1095 for segment in self.segments: 1096 if segment.segname == segname: 1097 return segment 1098 return None 1099 1100 def get_first_load_command(self, lc_enum_value): 1101 for lc in self.commands: 1102 if lc.command.value == lc_enum_value: 1103 return lc 1104 return None 1105 1106 def get_symtab(self): 1107 if self.data and not self.symbols: 1108 lc_symtab = self.get_first_load_command(LC_SYMTAB) 1109 if lc_symtab: 1110 symtab_offset = self.file_off 1111 if self.data.is_in_memory(): 1112 linkedit_segment = self.get_segment('__LINKEDIT') 1113 if linkedit_segment: 1114 linkedit_vmaddr = linkedit_segment.vmaddr 1115 linkedit_fileoff = linkedit_segment.fileoff 1116 symtab_offset = linkedit_vmaddr + lc_symtab.symoff - linkedit_fileoff 1117 symtab_offset = linkedit_vmaddr + lc_symtab.stroff - linkedit_fileoff 1118 else: 1119 symtab_offset += lc_symtab.symoff 1120 1121 self.data.seek(symtab_offset) 1122 is_64 = self.is_64_bit() 1123 for i in range(lc_symtab.nsyms): 1124 nlist = Mach.NList() 1125 nlist.unpack(self, self.data, lc_symtab) 1126 self.symbols.append(nlist) 1127 else: 1128 print("no LC_SYMTAB") 1129 1130 def dump_symtab(self, dump_description=True, options=None): 1131 self.get_symtab() 1132 if dump_description: 1133 print(self.description()) 1134 for i, symbol in enumerate(self.symbols): 1135 print('[%5u] %s' % (i, symbol)) 1136 1137 def dump_symbol_names_matching_regex(self, regex, file=None): 1138 self.get_symtab() 1139 for symbol in self.symbols: 1140 if symbol.name and regex.search(symbol.name): 1141 print(symbol.name) 1142 if file: 1143 file.write('%s\n' % (symbol.name)) 1144 1145 def is_64_bit(self): 1146 return self.magic.is_64_bit() 1147 1148 class LoadCommand: 1149 1150 class Command(dict_utils.Enum): 1151 enum = { 1152 'LC_SEGMENT': LC_SEGMENT, 1153 'LC_SYMTAB': LC_SYMTAB, 1154 'LC_SYMSEG': LC_SYMSEG, 1155 'LC_THREAD': LC_THREAD, 1156 'LC_UNIXTHREAD': LC_UNIXTHREAD, 1157 'LC_LOADFVMLIB': LC_LOADFVMLIB, 1158 'LC_IDFVMLIB': LC_IDFVMLIB, 1159 'LC_IDENT': LC_IDENT, 1160 'LC_FVMFILE': LC_FVMFILE, 1161 'LC_PREPAGE': LC_PREPAGE, 1162 'LC_DYSYMTAB': LC_DYSYMTAB, 1163 'LC_LOAD_DYLIB': LC_LOAD_DYLIB, 1164 'LC_ID_DYLIB': LC_ID_DYLIB, 1165 'LC_LOAD_DYLINKER': LC_LOAD_DYLINKER, 1166 'LC_ID_DYLINKER': LC_ID_DYLINKER, 1167 'LC_PREBOUND_DYLIB': LC_PREBOUND_DYLIB, 1168 'LC_ROUTINES': LC_ROUTINES, 1169 'LC_SUB_FRAMEWORK': LC_SUB_FRAMEWORK, 1170 'LC_SUB_UMBRELLA': LC_SUB_UMBRELLA, 1171 'LC_SUB_CLIENT': LC_SUB_CLIENT, 1172 'LC_SUB_LIBRARY': LC_SUB_LIBRARY, 1173 'LC_TWOLEVEL_HINTS': LC_TWOLEVEL_HINTS, 1174 'LC_PREBIND_CKSUM': LC_PREBIND_CKSUM, 1175 'LC_LOAD_WEAK_DYLIB': LC_LOAD_WEAK_DYLIB, 1176 'LC_SEGMENT_64': LC_SEGMENT_64, 1177 'LC_ROUTINES_64': LC_ROUTINES_64, 1178 'LC_UUID': LC_UUID, 1179 'LC_RPATH': LC_RPATH, 1180 'LC_CODE_SIGNATURE': LC_CODE_SIGNATURE, 1181 'LC_SEGMENT_SPLIT_INFO': LC_SEGMENT_SPLIT_INFO, 1182 'LC_REEXPORT_DYLIB': LC_REEXPORT_DYLIB, 1183 'LC_LAZY_LOAD_DYLIB': LC_LAZY_LOAD_DYLIB, 1184 'LC_ENCRYPTION_INFO': LC_ENCRYPTION_INFO, 1185 'LC_DYLD_INFO': LC_DYLD_INFO, 1186 'LC_DYLD_INFO_ONLY': LC_DYLD_INFO_ONLY, 1187 'LC_LOAD_UPWARD_DYLIB': LC_LOAD_UPWARD_DYLIB, 1188 'LC_VERSION_MIN_MACOSX': LC_VERSION_MIN_MACOSX, 1189 'LC_VERSION_MIN_IPHONEOS': LC_VERSION_MIN_IPHONEOS, 1190 'LC_FUNCTION_STARTS': LC_FUNCTION_STARTS, 1191 'LC_DYLD_ENVIRONMENT': LC_DYLD_ENVIRONMENT 1192 } 1193 1194 def __init__(self, initial_value=0): 1195 dict_utils.Enum.__init__(self, initial_value, self.enum) 1196 1197 def __init__(self, c=None, l=0, o=0): 1198 if c is not None: 1199 self.command = c 1200 else: 1201 self.command = Mach.LoadCommand.Command(0) 1202 self.length = l 1203 self.file_off = o 1204 1205 def unpack(self, mach_file, data): 1206 self.file_off = data.tell() 1207 self.command.value, self.length = data.get_n_uint32(2) 1208 1209 def skip(self, data): 1210 data.seek(self.file_off + self.length, 0) 1211 1212 def __str__(self): 1213 lc_name = self.command.get_enum_name() 1214 return '%#8.8x: <%#4.4x> %-24s' % (self.file_off, 1215 self.length, lc_name) 1216 1217 class Section: 1218 1219 def __init__(self): 1220 self.index = 0 1221 self.is_64 = False 1222 self.sectname = None 1223 self.segname = None 1224 self.addr = 0 1225 self.size = 0 1226 self.offset = 0 1227 self.align = 0 1228 self.reloff = 0 1229 self.nreloc = 0 1230 self.flags = 0 1231 self.reserved1 = 0 1232 self.reserved2 = 0 1233 self.reserved3 = 0 1234 1235 def unpack(self, is_64, data): 1236 self.is_64 = is_64 1237 self.sectname = data.get_fixed_length_c_string(16, '', True) 1238 self.segname = data.get_fixed_length_c_string(16, '', True) 1239 if self.is_64: 1240 self.addr, self.size = data.get_n_uint64(2) 1241 self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3 = data.get_n_uint32( 1242 8) 1243 else: 1244 self.addr, self.size = data.get_n_uint32(2) 1245 self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2 = data.get_n_uint32( 1246 7) 1247 1248 def dump_header(self): 1249 if self.is_64: 1250 print("INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 RESERVED3 NAME") 1251 print("===== ------------------ ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------") 1252 else: 1253 print("INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 NAME") 1254 print("===== ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------") 1255 1256 def __str__(self): 1257 if self.is_64: 1258 return "[%3u] %#16.16x %#16.16x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % ( 1259 self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3, self.segname, self.sectname) 1260 else: 1261 return "[%3u] %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % ( 1262 self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.segname, self.sectname) 1263 1264 def get_contents(self, mach_file): 1265 '''Get the section contents as a python string''' 1266 if self.size > 0 and mach_file.get_segment( 1267 self.segname).filesize > 0: 1268 data = mach_file.get_data() 1269 if data: 1270 section_data_offset = mach_file.file_off + self.offset 1271 # print '%s.%s is at offset 0x%x with size 0x%x' % 1272 # (self.segname, self.sectname, section_data_offset, 1273 # self.size) 1274 data.push_offset_and_seek(section_data_offset) 1275 bytes = data.read_size(self.size) 1276 data.pop_offset_and_seek() 1277 return bytes 1278 return None 1279 1280 class DylibLoadCommand(LoadCommand): 1281 1282 def __init__(self, lc): 1283 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1284 self.name = None 1285 self.timestamp = 0 1286 self.current_version = 0 1287 self.compatibility_version = 0 1288 1289 def unpack(self, mach_file, data): 1290 byte_order_char = mach_file.magic.get_byte_order() 1291 name_offset, self.timestamp, self.current_version, self.compatibility_version = data.get_n_uint32( 1292 4) 1293 data.seek(self.file_off + name_offset, 0) 1294 self.name = data.get_fixed_length_c_string(self.length - 24) 1295 1296 def __str__(self): 1297 s = Mach.LoadCommand.__str__(self) 1298 s += "%#8.8x %#8.8x %#8.8x " % (self.timestamp, 1299 self.current_version, 1300 self.compatibility_version) 1301 s += self.name 1302 return s 1303 1304 class LoadDYLDLoadCommand(LoadCommand): 1305 1306 def __init__(self, lc): 1307 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1308 self.name = None 1309 1310 def unpack(self, mach_file, data): 1311 data.get_uint32() 1312 self.name = data.get_fixed_length_c_string(self.length - 12) 1313 1314 def __str__(self): 1315 s = Mach.LoadCommand.__str__(self) 1316 s += "%s" % self.name 1317 return s 1318 1319 class UnixThreadLoadCommand(LoadCommand): 1320 1321 class ThreadState: 1322 1323 def __init__(self): 1324 self.flavor = 0 1325 self.count = 0 1326 self.register_values = list() 1327 1328 def unpack(self, data): 1329 self.flavor, self.count = data.get_n_uint32(2) 1330 self.register_values = data.get_n_uint32(self.count) 1331 1332 def __str__(self): 1333 s = "flavor = %u, count = %u, regs =" % ( 1334 self.flavor, self.count) 1335 i = 0 1336 for register_value in self.register_values: 1337 if i % 8 == 0: 1338 s += "\n " 1339 s += " %#8.8x" % register_value 1340 i += 1 1341 return s 1342 1343 def __init__(self, lc): 1344 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1345 self.reg_sets = list() 1346 1347 def unpack(self, mach_file, data): 1348 reg_set = Mach.UnixThreadLoadCommand.ThreadState() 1349 reg_set.unpack(data) 1350 self.reg_sets.append(reg_set) 1351 1352 def __str__(self): 1353 s = Mach.LoadCommand.__str__(self) 1354 for reg_set in self.reg_sets: 1355 s += "%s" % reg_set 1356 return s 1357 1358 class DYLDInfoOnlyLoadCommand(LoadCommand): 1359 1360 def __init__(self, lc): 1361 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1362 self.rebase_off = 0 1363 self.rebase_size = 0 1364 self.bind_off = 0 1365 self.bind_size = 0 1366 self.weak_bind_off = 0 1367 self.weak_bind_size = 0 1368 self.lazy_bind_off = 0 1369 self.lazy_bind_size = 0 1370 self.export_off = 0 1371 self.export_size = 0 1372 1373 def unpack(self, mach_file, data): 1374 byte_order_char = mach_file.magic.get_byte_order() 1375 self.rebase_off, self.rebase_size, self.bind_off, self.bind_size, self.weak_bind_off, self.weak_bind_size, self.lazy_bind_off, self.lazy_bind_size, self.export_off, self.export_size = data.get_n_uint32( 1376 10) 1377 1378 def __str__(self): 1379 s = Mach.LoadCommand.__str__(self) 1380 s += "rebase_off = %#8.8x, rebase_size = %u, " % ( 1381 self.rebase_off, self.rebase_size) 1382 s += "bind_off = %#8.8x, bind_size = %u, " % ( 1383 self.bind_off, self.bind_size) 1384 s += "weak_bind_off = %#8.8x, weak_bind_size = %u, " % ( 1385 self.weak_bind_off, self.weak_bind_size) 1386 s += "lazy_bind_off = %#8.8x, lazy_bind_size = %u, " % ( 1387 self.lazy_bind_off, self.lazy_bind_size) 1388 s += "export_off = %#8.8x, export_size = %u, " % ( 1389 self.export_off, self.export_size) 1390 return s 1391 1392 class DYLDSymtabLoadCommand(LoadCommand): 1393 1394 def __init__(self, lc): 1395 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1396 self.ilocalsym = 0 1397 self.nlocalsym = 0 1398 self.iextdefsym = 0 1399 self.nextdefsym = 0 1400 self.iundefsym = 0 1401 self.nundefsym = 0 1402 self.tocoff = 0 1403 self.ntoc = 0 1404 self.modtaboff = 0 1405 self.nmodtab = 0 1406 self.extrefsymoff = 0 1407 self.nextrefsyms = 0 1408 self.indirectsymoff = 0 1409 self.nindirectsyms = 0 1410 self.extreloff = 0 1411 self.nextrel = 0 1412 self.locreloff = 0 1413 self.nlocrel = 0 1414 1415 def unpack(self, mach_file, data): 1416 byte_order_char = mach_file.magic.get_byte_order() 1417 self.ilocalsym, self.nlocalsym, self.iextdefsym, self.nextdefsym, self.iundefsym, self.nundefsym, self.tocoff, self.ntoc, self.modtaboff, self.nmodtab, self.extrefsymoff, self.nextrefsyms, self.indirectsymoff, self.nindirectsyms, self.extreloff, self.nextrel, self.locreloff, self.nlocrel = data.get_n_uint32( 1418 18) 1419 1420 def __str__(self): 1421 s = Mach.LoadCommand.__str__(self) 1422 # s += "ilocalsym = %u, nlocalsym = %u, " % (self.ilocalsym, self.nlocalsym) 1423 # s += "iextdefsym = %u, nextdefsym = %u, " % (self.iextdefsym, self.nextdefsym) 1424 # s += "iundefsym %u, nundefsym = %u, " % (self.iundefsym, self.nundefsym) 1425 # s += "tocoff = %#8.8x, ntoc = %u, " % (self.tocoff, self.ntoc) 1426 # s += "modtaboff = %#8.8x, nmodtab = %u, " % (self.modtaboff, self.nmodtab) 1427 # s += "extrefsymoff = %#8.8x, nextrefsyms = %u, " % (self.extrefsymoff, self.nextrefsyms) 1428 # s += "indirectsymoff = %#8.8x, nindirectsyms = %u, " % (self.indirectsymoff, self.nindirectsyms) 1429 # s += "extreloff = %#8.8x, nextrel = %u, " % (self.extreloff, self.nextrel) 1430 # s += "locreloff = %#8.8x, nlocrel = %u" % (self.locreloff, 1431 # self.nlocrel) 1432 s += "ilocalsym = %-10u, nlocalsym = %u\n" % ( 1433 self.ilocalsym, self.nlocalsym) 1434 s += " iextdefsym = %-10u, nextdefsym = %u\n" % ( 1435 self.iextdefsym, self.nextdefsym) 1436 s += " iundefsym = %-10u, nundefsym = %u\n" % ( 1437 self.iundefsym, self.nundefsym) 1438 s += " tocoff = %#8.8x, ntoc = %u\n" % ( 1439 self.tocoff, self.ntoc) 1440 s += " modtaboff = %#8.8x, nmodtab = %u\n" % ( 1441 self.modtaboff, self.nmodtab) 1442 s += " extrefsymoff = %#8.8x, nextrefsyms = %u\n" % ( 1443 self.extrefsymoff, self.nextrefsyms) 1444 s += " indirectsymoff = %#8.8x, nindirectsyms = %u\n" % ( 1445 self.indirectsymoff, self.nindirectsyms) 1446 s += " extreloff = %#8.8x, nextrel = %u\n" % ( 1447 self.extreloff, self.nextrel) 1448 s += " locreloff = %#8.8x, nlocrel = %u" % ( 1449 self.locreloff, self.nlocrel) 1450 return s 1451 1452 class SymtabLoadCommand(LoadCommand): 1453 1454 def __init__(self, lc): 1455 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1456 self.symoff = 0 1457 self.nsyms = 0 1458 self.stroff = 0 1459 self.strsize = 0 1460 1461 def unpack(self, mach_file, data): 1462 byte_order_char = mach_file.magic.get_byte_order() 1463 self.symoff, self.nsyms, self.stroff, self.strsize = data.get_n_uint32( 1464 4) 1465 1466 def __str__(self): 1467 s = Mach.LoadCommand.__str__(self) 1468 s += "symoff = %#8.8x, nsyms = %u, stroff = %#8.8x, strsize = %u" % ( 1469 self.symoff, self.nsyms, self.stroff, self.strsize) 1470 return s 1471 1472 class UUIDLoadCommand(LoadCommand): 1473 1474 def __init__(self, lc): 1475 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1476 self.uuid = None 1477 1478 def unpack(self, mach_file, data): 1479 uuid_data = data.get_n_uint8(16) 1480 uuid_str = '' 1481 for byte in uuid_data: 1482 uuid_str += '%2.2x' % byte 1483 self.uuid = uuid.UUID(uuid_str) 1484 mach_file.uuid = self.uuid 1485 1486 def __str__(self): 1487 s = Mach.LoadCommand.__str__(self) 1488 s += self.uuid.__str__() 1489 return s 1490 1491 class DataBlobLoadCommand(LoadCommand): 1492 1493 def __init__(self, lc): 1494 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1495 self.dataoff = 0 1496 self.datasize = 0 1497 1498 def unpack(self, mach_file, data): 1499 byte_order_char = mach_file.magic.get_byte_order() 1500 self.dataoff, self.datasize = data.get_n_uint32(2) 1501 1502 def __str__(self): 1503 s = Mach.LoadCommand.__str__(self) 1504 s += "dataoff = %#8.8x, datasize = %u" % ( 1505 self.dataoff, self.datasize) 1506 return s 1507 1508 class EncryptionInfoLoadCommand(LoadCommand): 1509 1510 def __init__(self, lc): 1511 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1512 self.cryptoff = 0 1513 self.cryptsize = 0 1514 self.cryptid = 0 1515 1516 def unpack(self, mach_file, data): 1517 byte_order_char = mach_file.magic.get_byte_order() 1518 self.cryptoff, self.cryptsize, self.cryptid = data.get_n_uint32(3) 1519 1520 def __str__(self): 1521 s = Mach.LoadCommand.__str__(self) 1522 s += "file-range = [%#8.8x - %#8.8x), cryptsize = %u, cryptid = %u" % ( 1523 self.cryptoff, self.cryptoff + self.cryptsize, self.cryptsize, self.cryptid) 1524 return s 1525 1526 class SegmentLoadCommand(LoadCommand): 1527 1528 def __init__(self, lc): 1529 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1530 self.segname = None 1531 self.vmaddr = 0 1532 self.vmsize = 0 1533 self.fileoff = 0 1534 self.filesize = 0 1535 self.maxprot = 0 1536 self.initprot = 0 1537 self.nsects = 0 1538 self.flags = 0 1539 1540 def unpack(self, mach_file, data): 1541 is_64 = self.command.get_enum_value() == LC_SEGMENT_64 1542 self.segname = data.get_fixed_length_c_string(16, '', True) 1543 if is_64: 1544 self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint64( 1545 4) 1546 else: 1547 self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint32( 1548 4) 1549 self.maxprot, self.initprot, self.nsects, self.flags = data.get_n_uint32( 1550 4) 1551 mach_file.segments.append(self) 1552 for i in range(self.nsects): 1553 section = Mach.Section() 1554 section.unpack(is_64, data) 1555 section.index = len(mach_file.sections) 1556 mach_file.sections.append(section) 1557 1558 def __str__(self): 1559 s = Mach.LoadCommand.__str__(self) 1560 if self.command.get_enum_value() == LC_SEGMENT: 1561 s += "%#8.8x %#8.8x %#8.8x %#8.8x " % ( 1562 self.vmaddr, self.vmsize, self.fileoff, self.filesize) 1563 else: 1564 s += "%#16.16x %#16.16x %#16.16x %#16.16x " % ( 1565 self.vmaddr, self.vmsize, self.fileoff, self.filesize) 1566 s += "%s %s %3u %#8.8x" % (vm_prot_names[self.maxprot], vm_prot_names[ 1567 self.initprot], self.nsects, self.flags) 1568 s += ' ' + self.segname 1569 return s 1570 1571 class NList: 1572 1573 class Type: 1574 1575 class Stab(dict_utils.Enum): 1576 enum = { 1577 'N_GSYM': N_GSYM, 1578 'N_FNAME': N_FNAME, 1579 'N_FUN': N_FUN, 1580 'N_STSYM': N_STSYM, 1581 'N_LCSYM': N_LCSYM, 1582 'N_BNSYM': N_BNSYM, 1583 'N_OPT': N_OPT, 1584 'N_RSYM': N_RSYM, 1585 'N_SLINE': N_SLINE, 1586 'N_ENSYM': N_ENSYM, 1587 'N_SSYM': N_SSYM, 1588 'N_SO': N_SO, 1589 'N_OSO': N_OSO, 1590 'N_LSYM': N_LSYM, 1591 'N_BINCL': N_BINCL, 1592 'N_SOL': N_SOL, 1593 'N_PARAMS': N_PARAMS, 1594 'N_VERSION': N_VERSION, 1595 'N_OLEVEL': N_OLEVEL, 1596 'N_PSYM': N_PSYM, 1597 'N_EINCL': N_EINCL, 1598 'N_ENTRY': N_ENTRY, 1599 'N_LBRAC': N_LBRAC, 1600 'N_EXCL': N_EXCL, 1601 'N_RBRAC': N_RBRAC, 1602 'N_BCOMM': N_BCOMM, 1603 'N_ECOMM': N_ECOMM, 1604 'N_ECOML': N_ECOML, 1605 'N_LENG': N_LENG 1606 } 1607 1608 def __init__(self, magic=0): 1609 dict_utils.Enum.__init__(self, magic, self.enum) 1610 1611 def __init__(self, t=0): 1612 self.value = t 1613 1614 def __str__(self): 1615 n_type = self.value 1616 if n_type & N_STAB: 1617 stab = Mach.NList.Type.Stab(self.value) 1618 return '%s' % stab 1619 else: 1620 type = self.value & N_TYPE 1621 type_str = '' 1622 if type == N_UNDF: 1623 type_str = 'N_UNDF' 1624 elif type == N_ABS: 1625 type_str = 'N_ABS ' 1626 elif type == N_SECT: 1627 type_str = 'N_SECT' 1628 elif type == N_PBUD: 1629 type_str = 'N_PBUD' 1630 elif type == N_INDR: 1631 type_str = 'N_INDR' 1632 else: 1633 type_str = "??? (%#2.2x)" % type 1634 if n_type & N_PEXT: 1635 type_str += ' | PEXT' 1636 if n_type & N_EXT: 1637 type_str += ' | EXT ' 1638 return type_str 1639 1640 def __init__(self): 1641 self.index = 0 1642 self.name_offset = 0 1643 self.name = 0 1644 self.type = Mach.NList.Type() 1645 self.sect_idx = 0 1646 self.desc = 0 1647 self.value = 0 1648 1649 def unpack(self, mach_file, data, symtab_lc): 1650 self.index = len(mach_file.symbols) 1651 self.name_offset = data.get_uint32() 1652 self.type.value, self.sect_idx = data.get_n_uint8(2) 1653 self.desc = data.get_uint16() 1654 if mach_file.is_64_bit(): 1655 self.value = data.get_uint64() 1656 else: 1657 self.value = data.get_uint32() 1658 data.push_offset_and_seek( 1659 mach_file.file_off + 1660 symtab_lc.stroff + 1661 self.name_offset) 1662 # print "get string for symbol[%u]" % self.index 1663 self.name = data.get_c_string() 1664 data.pop_offset_and_seek() 1665 1666 def __str__(self): 1667 name_display = '' 1668 if len(self.name): 1669 name_display = ' "%s"' % self.name 1670 return '%#8.8x %#2.2x (%-20s) %#2.2x %#4.4x %16.16x%s' % (self.name_offset, 1671 self.type.value, self.type, self.sect_idx, self.desc, self.value, name_display) 1672 1673 class Interactive(cmd.Cmd): 1674 '''Interactive command interpreter to mach-o files.''' 1675 1676 def __init__(self, mach, options): 1677 cmd.Cmd.__init__(self) 1678 self.intro = 'Interactive mach-o command interpreter' 1679 self.prompt = 'mach-o: %s %% ' % mach.path 1680 self.mach = mach 1681 self.options = options 1682 1683 def default(self, line): 1684 '''Catch all for unknown command, which will exit the interpreter.''' 1685 print("uknown command: %s" % line) 1686 return True 1687 1688 def do_q(self, line): 1689 '''Quit command''' 1690 return True 1691 1692 def do_quit(self, line): 1693 '''Quit command''' 1694 return True 1695 1696 def do_header(self, line): 1697 '''Dump mach-o file headers''' 1698 self.mach.dump_header(True, self.options) 1699 return False 1700 1701 def do_load(self, line): 1702 '''Dump all mach-o load commands''' 1703 self.mach.dump_load_commands(True, self.options) 1704 return False 1705 1706 def do_sections(self, line): 1707 '''Dump all mach-o sections''' 1708 self.mach.dump_sections(True, self.options) 1709 return False 1710 1711 def do_symtab(self, line): 1712 '''Dump all mach-o symbols in the symbol table''' 1713 self.mach.dump_symtab(True, self.options) 1714 return False 1715 1716if __name__ == '__main__': 1717 parser = optparse.OptionParser( 1718 description='A script that parses skinny and universal mach-o files.') 1719 parser.add_option( 1720 '--arch', 1721 '-a', 1722 type='string', 1723 metavar='arch', 1724 dest='archs', 1725 action='append', 1726 help='specify one or more architectures by name') 1727 parser.add_option( 1728 '-v', 1729 '--verbose', 1730 action='store_true', 1731 dest='verbose', 1732 help='display verbose debug info', 1733 default=False) 1734 parser.add_option( 1735 '-H', 1736 '--header', 1737 action='store_true', 1738 dest='dump_header', 1739 help='dump the mach-o file header', 1740 default=False) 1741 parser.add_option( 1742 '-l', 1743 '--load-commands', 1744 action='store_true', 1745 dest='dump_load_commands', 1746 help='dump the mach-o load commands', 1747 default=False) 1748 parser.add_option( 1749 '-s', 1750 '--symtab', 1751 action='store_true', 1752 dest='dump_symtab', 1753 help='dump the mach-o symbol table', 1754 default=False) 1755 parser.add_option( 1756 '-S', 1757 '--sections', 1758 action='store_true', 1759 dest='dump_sections', 1760 help='dump the mach-o sections', 1761 default=False) 1762 parser.add_option( 1763 '--section', 1764 type='string', 1765 metavar='sectname', 1766 dest='section_names', 1767 action='append', 1768 help='Specify one or more section names to dump', 1769 default=[]) 1770 parser.add_option( 1771 '-o', 1772 '--out', 1773 type='string', 1774 dest='outfile', 1775 help='Used in conjunction with the --section=NAME option to save a single section\'s data to disk.', 1776 default=False) 1777 parser.add_option( 1778 '-i', 1779 '--interactive', 1780 action='store_true', 1781 dest='interactive', 1782 help='enable interactive mode', 1783 default=False) 1784 parser.add_option( 1785 '-m', 1786 '--mangled', 1787 action='store_true', 1788 dest='find_mangled', 1789 help='dump all mangled names in a mach file', 1790 default=False) 1791 parser.add_option( 1792 '-c', 1793 '--compare', 1794 action='store_true', 1795 dest='compare', 1796 help='compare two mach files', 1797 default=False) 1798 parser.add_option( 1799 '-M', 1800 '--extract-modules', 1801 action='store_true', 1802 dest='extract_modules', 1803 help='Extract modules from file', 1804 default=False) 1805 parser.add_option( 1806 '-C', 1807 '--count', 1808 type='int', 1809 dest='max_count', 1810 help='Sets the max byte count when dumping section data', 1811 default=-1) 1812 1813 (options, mach_files) = parser.parse_args() 1814 if options.extract_modules: 1815 if options.section_names: 1816 print("error: can't use --section option with the --extract-modules option") 1817 exit(1) 1818 if not options.outfile: 1819 print("error: the --output=FILE option must be specified with the --extract-modules option") 1820 exit(1) 1821 options.section_names.append("__apple_ast") 1822 if options.compare: 1823 if len(mach_files) == 2: 1824 mach_a = Mach() 1825 mach_b = Mach() 1826 mach_a.parse(mach_files[0]) 1827 mach_b.parse(mach_files[1]) 1828 mach_a.compare(mach_b) 1829 else: 1830 print('error: --compare takes two mach files as arguments') 1831 else: 1832 if not (options.dump_header or options.dump_load_commands or options.dump_symtab or options.dump_sections or options.find_mangled or options.section_names): 1833 options.dump_header = True 1834 options.dump_load_commands = True 1835 if options.verbose: 1836 print('options', options) 1837 print('mach_files', mach_files) 1838 for path in mach_files: 1839 mach = Mach() 1840 mach.parse(path) 1841 if options.interactive: 1842 interpreter = Mach.Interactive(mach, options) 1843 interpreter.cmdloop() 1844 else: 1845 mach.dump(options) 1846