1""" 2Utilities for reading and writing Mach-O headers 3""" 4from __future__ import print_function 5 6import sys 7import struct 8import os 9 10from .mach_o import MH_FILETYPE_SHORTNAMES, LC_DYSYMTAB, LC_SYMTAB 11from .mach_o import load_command, S_ZEROFILL, section_64, section 12from .mach_o import LC_REGISTRY, LC_ID_DYLIB, LC_SEGMENT, fat_header 13from .mach_o import LC_SEGMENT_64, MH_CIGAM_64, MH_MAGIC_64, FAT_MAGIC 14from .mach_o import mach_header, fat_arch64, FAT_MAGIC_64, fat_arch 15from .mach_o import LC_REEXPORT_DYLIB, LC_PREBOUND_DYLIB, LC_LOAD_WEAK_DYLIB 16from .mach_o import LC_LOAD_UPWARD_DYLIB, LC_LOAD_DYLIB, mach_header_64 17from .mach_o import MH_CIGAM, MH_MAGIC 18from .ptypes import sizeof 19 20from macholib.util import fileview 21try: 22 from macholib.compat import bytes 23except ImportError: 24 pass 25 26try: 27 unicode 28except NameError: 29 unicode = str 30 31if sys.version_info[0] == 2: 32 range = xrange # noqa: F821 33 34__all__ = ['MachO'] 35 36_RELOCATABLE = set(( 37 # relocatable commands that should be used for dependency walking 38 LC_LOAD_DYLIB, 39 LC_LOAD_UPWARD_DYLIB, 40 LC_LOAD_WEAK_DYLIB, 41 LC_PREBOUND_DYLIB, 42 LC_REEXPORT_DYLIB, 43)) 44 45_RELOCATABLE_NAMES = { 46 LC_LOAD_DYLIB: 'load_dylib', 47 LC_LOAD_UPWARD_DYLIB: 'load_upward_dylib', 48 LC_LOAD_WEAK_DYLIB: 'load_weak_dylib', 49 LC_PREBOUND_DYLIB: 'prebound_dylib', 50 LC_REEXPORT_DYLIB: 'reexport_dylib', 51} 52 53 54def _shouldRelocateCommand(cmd): 55 """ 56 Should this command id be investigated for relocation? 57 """ 58 return cmd in _RELOCATABLE 59 60 61def lc_str_value(offset, cmd_info): 62 """ 63 Fetch the actual value of a field of type "lc_str" 64 """ 65 cmd_load, cmd_cmd, cmd_data = cmd_info 66 67 offset -= sizeof(cmd_load) + sizeof(cmd_cmd) 68 return cmd_data[offset:].strip(b'\x00') 69 70 71class MachO(object): 72 """ 73 Provides reading/writing the Mach-O header of a specific existing file 74 """ 75 # filename - the original filename of this mach-o 76 # sizediff - the current deviation from the initial mach-o size 77 # header - the mach-o header 78 # commands - a list of (load_command, somecommand, data) 79 # data is either a str, or a list of segment structures 80 # total_size - the current mach-o header size (including header) 81 # low_offset - essentially, the maximum mach-o header size 82 # id_cmd - the index of my id command, or None 83 84 def __init__(self, filename): 85 86 # supports the ObjectGraph protocol 87 self.graphident = filename 88 self.filename = filename 89 self.loader_path = os.path.dirname(filename) 90 91 # initialized by load 92 self.fat = None 93 self.headers = [] 94 with open(filename, 'rb') as fp: 95 self.load(fp) 96 97 def __repr__(self): 98 return "<MachO filename=%r>" % (self.filename,) 99 100 def load(self, fh): 101 assert fh.tell() == 0 102 header = struct.unpack('>I', fh.read(4))[0] 103 fh.seek(0) 104 if header in (FAT_MAGIC, FAT_MAGIC_64): 105 self.load_fat(fh) 106 else: 107 fh.seek(0, 2) 108 size = fh.tell() 109 fh.seek(0) 110 self.load_header(fh, 0, size) 111 112 def load_fat(self, fh): 113 self.fat = fat_header.from_fileobj(fh) 114 if self.fat.magic == FAT_MAGIC: 115 archs = [fat_arch.from_fileobj(fh) 116 for i in range(self.fat.nfat_arch)] 117 elif self.fat.magic == FAT_MAGIC_64: 118 archs = [fat_arch64.from_fileobj(fh) 119 for i in range(self.fat.nfat_arch)] 120 else: 121 raise ValueError("Unknown fat header magic: %r" % (self.fat.magic)) 122 123 for arch in archs: 124 self.load_header(fh, arch.offset, arch.size) 125 126 def rewriteLoadCommands(self, *args, **kw): 127 changed = False 128 for header in self.headers: 129 if header.rewriteLoadCommands(*args, **kw): 130 changed = True 131 return changed 132 133 def load_header(self, fh, offset, size): 134 fh.seek(offset) 135 header = struct.unpack('>I', fh.read(4))[0] 136 fh.seek(offset) 137 if header == MH_MAGIC: 138 magic, hdr, endian = MH_MAGIC, mach_header, '>' 139 elif header == MH_CIGAM: 140 magic, hdr, endian = MH_CIGAM, mach_header, '<' 141 elif header == MH_MAGIC_64: 142 magic, hdr, endian = MH_MAGIC_64, mach_header_64, '>' 143 elif header == MH_CIGAM_64: 144 magic, hdr, endian = MH_CIGAM_64, mach_header_64, '<' 145 else: 146 raise ValueError("Unknown Mach-O header: 0x%08x in %r" % ( 147 header, fh)) 148 hdr = MachOHeader(self, fh, offset, size, magic, hdr, endian) 149 self.headers.append(hdr) 150 151 def write(self, f): 152 for header in self.headers: 153 header.write(f) 154 155 156class MachOHeader(object): 157 """ 158 Provides reading/writing the Mach-O header of a specific existing file 159 """ 160 # filename - the original filename of this mach-o 161 # sizediff - the current deviation from the initial mach-o size 162 # header - the mach-o header 163 # commands - a list of (load_command, somecommand, data) 164 # data is either a str, or a list of segment structures 165 # total_size - the current mach-o header size (including header) 166 # low_offset - essentially, the maximum mach-o header size 167 # id_cmd - the index of my id command, or None 168 169 def __init__(self, parent, fh, offset, size, magic, hdr, endian): 170 self.MH_MAGIC = magic 171 self.mach_header = hdr 172 173 # These are all initialized by self.load() 174 self.parent = parent 175 self.offset = offset 176 self.size = size 177 178 self.endian = endian 179 self.header = None 180 self.commands = None 181 self.id_cmd = None 182 self.sizediff = None 183 self.total_size = None 184 self.low_offset = None 185 self.filetype = None 186 self.headers = [] 187 188 self.load(fh) 189 190 def __repr__(self): 191 return "<%s filename=%r offset=%d size=%d endian=%r>" % ( 192 type(self).__name__, self.parent.filename, self.offset, self.size, 193 self.endian) 194 195 def load(self, fh): 196 fh = fileview(fh, self.offset, self.size) 197 fh.seek(0) 198 199 self.sizediff = 0 200 kw = {'_endian_': self.endian} 201 header = self.mach_header.from_fileobj(fh, **kw) 202 self.header = header 203 # if header.magic != self.MH_MAGIC: 204 # raise ValueError("header has magic %08x, expecting %08x" % ( 205 # header.magic, self.MH_MAGIC)) 206 207 cmd = self.commands = [] 208 209 self.filetype = self.get_filetype_shortname(header.filetype) 210 211 read_bytes = 0 212 low_offset = sys.maxsize 213 for i in range(header.ncmds): 214 # read the load command 215 cmd_load = load_command.from_fileobj(fh, **kw) 216 217 # read the specific command 218 klass = LC_REGISTRY.get(cmd_load.cmd, None) 219 if klass is None: 220 raise ValueError("Unknown load command: %d" % (cmd_load.cmd,)) 221 cmd_cmd = klass.from_fileobj(fh, **kw) 222 223 if cmd_load.cmd == LC_ID_DYLIB: 224 # remember where this command was 225 if self.id_cmd is not None: 226 raise ValueError("This dylib already has an id") 227 self.id_cmd = i 228 229 if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64): 230 # for segment commands, read the list of segments 231 segs = [] 232 # assert that the size makes sense 233 if cmd_load.cmd == LC_SEGMENT: 234 section_cls = section 235 else: # LC_SEGMENT_64 236 section_cls = section_64 237 238 expected_size = ( 239 sizeof(klass) + sizeof(load_command) + 240 (sizeof(section_cls) * cmd_cmd.nsects) 241 ) 242 if cmd_load.cmdsize != expected_size: 243 raise ValueError("Segment size mismatch") 244 # this is a zero block or something 245 # so the beginning is wherever the fileoff of this command is 246 if cmd_cmd.nsects == 0: 247 if cmd_cmd.filesize != 0: 248 low_offset = min(low_offset, cmd_cmd.fileoff) 249 else: 250 # this one has multiple segments 251 for j in range(cmd_cmd.nsects): 252 # read the segment 253 seg = section_cls.from_fileobj(fh, **kw) 254 # if the segment has a size and is not zero filled 255 # then its beginning is the offset of this segment 256 not_zerofill = ((seg.flags & S_ZEROFILL) != S_ZEROFILL) 257 if seg.offset > 0 and seg.size > 0 and not_zerofill: 258 low_offset = min(low_offset, seg.offset) 259 if not_zerofill: 260 c = fh.tell() 261 fh.seek(seg.offset) 262 sd = fh.read(seg.size) 263 seg.add_section_data(sd) 264 fh.seek(c) 265 segs.append(seg) 266 # data is a list of segments 267 cmd_data = segs 268 269 # XXX: Disabled for now because writing back doesn't work 270 # elif cmd_load.cmd == LC_CODE_SIGNATURE: 271 # c = fh.tell() 272 # fh.seek(cmd_cmd.dataoff) 273 # cmd_data = fh.read(cmd_cmd.datasize) 274 # fh.seek(c) 275 # elif cmd_load.cmd == LC_SYMTAB: 276 # c = fh.tell() 277 # fh.seek(cmd_cmd.stroff) 278 # cmd_data = fh.read(cmd_cmd.strsize) 279 # fh.seek(c) 280 281 else: 282 # data is a raw str 283 data_size = ( 284 cmd_load.cmdsize - sizeof(klass) - sizeof(load_command) 285 ) 286 cmd_data = fh.read(data_size) 287 cmd.append((cmd_load, cmd_cmd, cmd_data)) 288 read_bytes += cmd_load.cmdsize 289 290 # make sure the header made sense 291 if read_bytes != header.sizeofcmds: 292 raise ValueError("Read %d bytes, header reports %d bytes" % ( 293 read_bytes, header.sizeofcmds)) 294 self.total_size = sizeof(self.mach_header) + read_bytes 295 self.low_offset = low_offset 296 297 def walkRelocatables(self, shouldRelocateCommand=_shouldRelocateCommand): 298 """ 299 for all relocatable commands 300 yield (command_index, command_name, filename) 301 """ 302 for (idx, (lc, cmd, data)) in enumerate(self.commands): 303 if shouldRelocateCommand(lc.cmd): 304 name = _RELOCATABLE_NAMES[lc.cmd] 305 ofs = cmd.name - sizeof(lc.__class__) - sizeof(cmd.__class__) 306 yield idx, name, data[ofs:data.find(b'\x00', ofs)].decode( 307 sys.getfilesystemencoding()) 308 309 def rewriteInstallNameCommand(self, loadcmd): 310 """Rewrite the load command of this dylib""" 311 if self.id_cmd is not None: 312 self.rewriteDataForCommand(self.id_cmd, loadcmd) 313 return True 314 return False 315 316 def changedHeaderSizeBy(self, bytes): 317 self.sizediff += bytes 318 if (self.total_size + self.sizediff) > self.low_offset: 319 print( 320 "WARNING: Mach-O header in %r may be too large to relocate" % ( 321 self.parent.filename,)) 322 323 def rewriteLoadCommands(self, changefunc): 324 """ 325 Rewrite the load commands based upon a change dictionary 326 """ 327 data = changefunc(self.parent.filename) 328 changed = False 329 if data is not None: 330 if self.rewriteInstallNameCommand( 331 data.encode(sys.getfilesystemencoding())): 332 changed = True 333 for idx, name, filename in self.walkRelocatables(): 334 data = changefunc(filename) 335 if data is not None: 336 if self.rewriteDataForCommand(idx, data.encode( 337 sys.getfilesystemencoding())): 338 changed = True 339 return changed 340 341 def rewriteDataForCommand(self, idx, data): 342 lc, cmd, old_data = self.commands[idx] 343 hdrsize = sizeof(lc.__class__) + sizeof(cmd.__class__) 344 align = struct.calcsize('Q') 345 data = data + (b'\x00' * (align - (len(data) % align))) 346 newsize = hdrsize + len(data) 347 self.commands[idx] = (lc, cmd, data) 348 self.changedHeaderSizeBy(newsize - lc.cmdsize) 349 lc.cmdsize, cmd.name = newsize, hdrsize 350 return True 351 352 def synchronize_size(self): 353 if (self.total_size + self.sizediff) > self.low_offset: 354 raise ValueError( 355 ("New Mach-O header is too large to relocate in %r " 356 "(new size=%r, max size=%r, delta=%r)") % ( 357 self.parent.filename, self.total_size + self.sizediff, 358 self.low_offset, self.sizediff)) 359 self.header.sizeofcmds += self.sizediff 360 self.total_size = sizeof(self.mach_header) + self.header.sizeofcmds 361 self.sizediff = 0 362 363 def write(self, fileobj): 364 fileobj = fileview(fileobj, self.offset, self.size) 365 fileobj.seek(0) 366 367 # serialize all the mach-o commands 368 self.synchronize_size() 369 370 self.header.to_fileobj(fileobj) 371 for lc, cmd, data in self.commands: 372 lc.to_fileobj(fileobj) 373 cmd.to_fileobj(fileobj) 374 375 if sys.version_info[0] == 2: 376 if isinstance(data, unicode): 377 fileobj.write(data.encode(sys.getfilesystemencoding())) 378 379 elif isinstance(data, (bytes, str)): 380 fileobj.write(data) 381 else: 382 # segments.. 383 for obj in data: 384 obj.to_fileobj(fileobj) 385 else: 386 if isinstance(data, str): 387 fileobj.write(data.encode(sys.getfilesystemencoding())) 388 389 elif isinstance(data, bytes): 390 fileobj.write(data) 391 392 else: 393 # segments.. 394 for obj in data: 395 obj.to_fileobj(fileobj) 396 397 # zero out the unused space, doubt this is strictly necessary 398 # and is generally probably already the case 399 fileobj.write(b'\x00' * (self.low_offset - fileobj.tell())) 400 401 def getSymbolTableCommand(self): 402 for lc, cmd, data in self.commands: 403 if lc.cmd == LC_SYMTAB: 404 return cmd 405 return None 406 407 def getDynamicSymbolTableCommand(self): 408 for lc, cmd, data in self.commands: 409 if lc.cmd == LC_DYSYMTAB: 410 return cmd 411 return None 412 413 def get_filetype_shortname(self, filetype): 414 if filetype in MH_FILETYPE_SHORTNAMES: 415 return MH_FILETYPE_SHORTNAMES[filetype] 416 else: 417 return 'unknown' 418 419 420def main(fn): 421 m = MachO(fn) 422 seen = set() 423 for header in m.headers: 424 for idx, name, other in header.walkRelocatables(): 425 if other not in seen: 426 seen.add(other) 427 print('\t' + name + ": " + other) 428 429 430if __name__ == '__main__': 431 import sys 432 files = sys.argv[1:] or ['/bin/ls'] 433 for fn in files: 434 print(fn) 435 main(fn) 436