1""" 2Utilities for reading and writing Mach-O headers 3""" 4from __future__ import print_function 5 6import sys 7import struct 8 9from macholib.mach_o import * 10from macholib.dyld import dyld_find, framework_info 11from macholib.util import fileview 12try: 13 from macholib.compat import bytes 14except ImportError: 15 pass 16 17try: 18 unicode 19except NameError: 20 unicode = str 21 22__all__ = ['MachO'] 23 24_RELOCATABLE = set(( 25 # relocatable commands that should be used for dependency walking 26 LC_LOAD_DYLIB, 27 LC_LOAD_WEAK_DYLIB, 28 LC_PREBOUND_DYLIB, 29 LC_REEXPORT_DYLIB, 30)) 31 32_RELOCATABLE_NAMES = { 33 LC_LOAD_DYLIB: 'load_dylib', 34 LC_LOAD_WEAK_DYLIB: 'load_weak_dylib', 35 LC_PREBOUND_DYLIB: 'prebound_dylib', 36 LC_REEXPORT_DYLIB: 'reexport_dylib', 37} 38 39def _shouldRelocateCommand(cmd): 40 """ 41 Should this command id be investigated for relocation? 42 """ 43 return cmd in _RELOCATABLE 44 45class MachO(object): 46 """ 47 Provides reading/writing the Mach-O header of a specific existing file 48 """ 49 # filename - the original filename of this mach-o 50 # sizediff - the current deviation from the initial mach-o size 51 # header - the mach-o header 52 # commands - a list of (load_command, somecommand, data) 53 # data is either a str, or a list of segment structures 54 # total_size - the current mach-o header size (including header) 55 # low_offset - essentially, the maximum mach-o header size 56 # id_cmd - the index of my id command, or None 57 58 59 def __init__(self, filename): 60 61 # supports the ObjectGraph protocol 62 self.graphident = filename 63 self.filename = filename 64 65 # initialized by load 66 self.fat = None 67 self.headers = [] 68 with open(filename, 'rb') as fp: 69 self.load(fp) 70 71 def __repr__(self): 72 return "<MachO filename=%r>" % (self.filename,) 73 74 def load(self, fh): 75 assert fh.tell() == 0 76 header = struct.unpack('>I', fh.read(4))[0] 77 fh.seek(0) 78 if header == FAT_MAGIC: 79 self.load_fat(fh) 80 else: 81 fh.seek(0, 2) 82 size = fh.tell() 83 fh.seek(0) 84 self.load_header(fh, 0, size) 85 86 def load_fat(self, fh): 87 self.fat = fat_header.from_fileobj(fh) 88 archs = [fat_arch.from_fileobj(fh) for i in range(self.fat.nfat_arch)] 89 for arch in archs: 90 self.load_header(fh, arch.offset, arch.size) 91 92 def rewriteLoadCommands(self, *args, **kw): 93 changed = False 94 for header in self.headers: 95 if header.rewriteLoadCommands(*args, **kw): 96 changed = True 97 return changed 98 99 def load_header(self, fh, offset, size): 100 fh.seek(offset) 101 header = struct.unpack('>I', fh.read(4))[0] 102 fh.seek(offset) 103 if header == MH_MAGIC: 104 magic, hdr, endian = MH_MAGIC, mach_header, '>' 105 elif header == MH_CIGAM: 106 magic, hdr, endian = MH_CIGAM, mach_header, '<' 107 elif header == MH_MAGIC_64: 108 magic, hdr, endian = MH_MAGIC_64, mach_header_64, '>' 109 elif header == MH_CIGAM_64: 110 magic, hdr, endian = MH_CIGAM_64, mach_header_64, '<' 111 else: 112 raise ValueError("Unknown Mach-O header: 0x%08x in %r" % ( 113 header, fh)) 114 hdr = MachOHeader(self, fh, offset, size, magic, hdr, endian) 115 self.headers.append(hdr) 116 117 def write(self, f): 118 for header in self.headers: 119 header.write(f) 120 121class MachOHeader(object): 122 """ 123 Provides reading/writing the Mach-O header of a specific existing file 124 """ 125 # filename - the original filename of this mach-o 126 # sizediff - the current deviation from the initial mach-o size 127 # header - the mach-o header 128 # commands - a list of (load_command, somecommand, data) 129 # data is either a str, or a list of segment structures 130 # total_size - the current mach-o header size (including header) 131 # low_offset - essentially, the maximum mach-o header size 132 # id_cmd - the index of my id command, or None 133 134 135 def __init__(self, parent, fh, offset, size, magic, hdr, endian): 136 self.MH_MAGIC = magic 137 self.mach_header = hdr 138 139 # These are all initialized by self.load() 140 self.parent = parent 141 self.offset = offset 142 self.size = size 143 144 self.endian = endian 145 self.header = None 146 self.commands = None 147 self.id_cmd = None 148 self.sizediff = None 149 self.total_size = None 150 self.low_offset = None 151 self.filetype = None 152 self.headers = [] 153 154 self.load(fh) 155 156 def __repr__(self): 157 return "<%s filename=%r offset=%d size=%d endian=%r>" % ( 158 type(self).__name__, self.parent.filename, self.offset, self.size, 159 self.endian) 160 161 def load(self, fh): 162 fh = fileview(fh, self.offset, self.size) 163 fh.seek(0) 164 165 self.sizediff = 0 166 kw = {'_endian_': self.endian} 167 header = self.mach_header.from_fileobj(fh, **kw) 168 self.header = header 169 #if header.magic != self.MH_MAGIC: 170 # raise ValueError("header has magic %08x, expecting %08x" % ( 171 # header.magic, self.MH_MAGIC)) 172 173 cmd = self.commands = [] 174 175 self.filetype = self.get_filetype_shortname(header.filetype) 176 177 read_bytes = 0 178 low_offset = sys.maxsize 179 for i in range(header.ncmds): 180 # read the load command 181 cmd_load = load_command.from_fileobj(fh, **kw) 182 183 # read the specific command 184 klass = LC_REGISTRY.get(cmd_load.cmd, None) 185 if klass is None: 186 raise ValueError("Unknown load command: %d" % (cmd_load.cmd,)) 187 cmd_cmd = klass.from_fileobj(fh, **kw) 188 189 if cmd_load.cmd == LC_ID_DYLIB: 190 # remember where this command was 191 if self.id_cmd is not None: 192 raise ValueError("This dylib already has an id") 193 self.id_cmd = i 194 195 if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64): 196 # for segment commands, read the list of segments 197 segs = [] 198 # assert that the size makes sense 199 if cmd_load.cmd == LC_SEGMENT: 200 section_cls = section 201 else: # LC_SEGMENT_64 202 section_cls = section_64 203 204 expected_size = ( 205 sizeof(klass) + sizeof(load_command) + 206 (sizeof(section_cls) * cmd_cmd.nsects) 207 ) 208 if cmd_load.cmdsize != expected_size: 209 raise ValueError("Segment size mismatch") 210 # this is a zero block or something 211 # so the beginning is wherever the fileoff of this command is 212 if cmd_cmd.nsects == 0: 213 if cmd_cmd.filesize != 0: 214 low_offset = min(low_offset, cmd_cmd.fileoff) 215 else: 216 # this one has multiple segments 217 for j in range(cmd_cmd.nsects): 218 # read the segment 219 seg = section_cls.from_fileobj(fh, **kw) 220 # if the segment has a size and is not zero filled 221 # then its beginning is the offset of this segment 222 not_zerofill = ((seg.flags & S_ZEROFILL) != S_ZEROFILL) 223 if seg.offset > 0 and seg.size > 0 and not_zerofill: 224 low_offset = min(low_offset, seg.offset) 225 if not_zerofill: 226 c = fh.tell() 227 fh.seek(seg.offset) 228 sd = fh.read(seg.size) 229 seg.add_section_data(sd) 230 fh.seek(c) 231 segs.append(seg) 232 # data is a list of segments 233 cmd_data = segs 234 235 # XXX: Disabled for now because writing back doesn't work 236 #elif cmd_load.cmd == LC_CODE_SIGNATURE: 237 # c = fh.tell() 238 # fh.seek(cmd_cmd.dataoff) 239 # cmd_data = fh.read(cmd_cmd.datasize) 240 # fh.seek(c) 241 #elif cmd_load.cmd == LC_SYMTAB: 242 # c = fh.tell() 243 # fh.seek(cmd_cmd.stroff) 244 # cmd_data = fh.read(cmd_cmd.strsize) 245 # fh.seek(c) 246 247 else: 248 # data is a raw str 249 data_size = ( 250 cmd_load.cmdsize - sizeof(klass) - sizeof(load_command) 251 ) 252 cmd_data = fh.read(data_size) 253 cmd.append((cmd_load, cmd_cmd, cmd_data)) 254 read_bytes += cmd_load.cmdsize 255 256 # make sure the header made sense 257 if read_bytes != header.sizeofcmds: 258 raise ValueError("Read %d bytes, header reports %d bytes" % ( 259 read_bytes, header.sizeofcmds)) 260 self.total_size = sizeof(self.mach_header) + read_bytes 261 self.low_offset = low_offset 262 263 # this header overwrites a segment, what the heck? 264 if self.total_size > low_offset: 265 raise ValueError("total_size > low_offset (%d > %d)" % ( 266 self.total_size, low_offset)) 267 268 def walkRelocatables(self, shouldRelocateCommand=_shouldRelocateCommand): 269 """ 270 for all relocatable commands 271 yield (command_index, command_name, filename) 272 """ 273 for (idx, (lc, cmd, data)) in enumerate(self.commands): 274 if shouldRelocateCommand(lc.cmd): 275 name = _RELOCATABLE_NAMES[lc.cmd] 276 ofs = cmd.name - sizeof(lc.__class__) - sizeof(cmd.__class__) 277 yield idx, name, data[ofs:data.find(b'\x00', ofs)].decode( 278 sys.getfilesystemencoding()) 279 280 def rewriteInstallNameCommand(self, loadcmd): 281 """Rewrite the load command of this dylib""" 282 if self.id_cmd is not None: 283 self.rewriteDataForCommand(self.id_cmd, loadcmd) 284 return True 285 return False 286 287 def changedHeaderSizeBy(self, bytes): 288 self.sizediff += bytes 289 if (self.total_size + self.sizediff) > self.low_offset: 290 print("WARNING: Mach-O header in %r may be too large to relocate"%(self.parent.filename,)) 291 292 def rewriteLoadCommands(self, changefunc): 293 """ 294 Rewrite the load commands based upon a change dictionary 295 """ 296 data = changefunc(self.parent.filename) 297 changed = False 298 if data is not None: 299 if self.rewriteInstallNameCommand( 300 data.encode(sys.getfilesystemencoding())): 301 changed = True 302 for idx, name, filename in self.walkRelocatables(): 303 data = changefunc(filename) 304 if data is not None: 305 if self.rewriteDataForCommand(idx, data.encode( 306 sys.getfilesystemencoding())): 307 changed = True 308 return changed 309 310 def rewriteDataForCommand(self, idx, data): 311 lc, cmd, old_data = self.commands[idx] 312 hdrsize = sizeof(lc.__class__) + sizeof(cmd.__class__) 313 align = struct.calcsize('L') 314 data = data + (b'\x00' * (align - (len(data) % align))) 315 newsize = hdrsize + len(data) 316 self.commands[idx] = (lc, cmd, data) 317 self.changedHeaderSizeBy(newsize - lc.cmdsize) 318 lc.cmdsize, cmd.name = newsize, hdrsize 319 return True 320 321 def synchronize_size(self): 322 if (self.total_size + self.sizediff) > self.low_offset: 323 raise ValueError("New Mach-O header is too large to relocate in %r"%(self.parent.filename,)) 324 self.header.sizeofcmds += self.sizediff 325 self.total_size = sizeof(self.mach_header) + self.header.sizeofcmds 326 self.sizediff = 0 327 328 def write(self, fileobj): 329 fileobj = fileview(fileobj, self.offset, self.size) 330 fileobj.seek(0) 331 332 # serialize all the mach-o commands 333 self.synchronize_size() 334 335 self.header.to_fileobj(fileobj) 336 for lc, cmd, data in self.commands: 337 lc.to_fileobj(fileobj) 338 cmd.to_fileobj(fileobj) 339 340 if sys.version_info[0] == 2: 341 if isinstance(data, unicode): 342 fileobj.write(data.encode(sys.getfilesystemencoding())) 343 344 elif isinstance(data, (bytes, str)): 345 fileobj.write(data) 346 else: 347 # segments.. 348 for obj in data: 349 obj.to_fileobj(fileobj) 350 else: 351 if isinstance(data, str): 352 fileobj.write(data.encode(sys.getfilesystemencoding())) 353 354 elif isinstance(data, bytes): 355 fileobj.write(data) 356 357 else: 358 # segments.. 359 for obj in data: 360 obj.to_fileobj(fileobj) 361 362 # zero out the unused space, doubt this is strictly necessary 363 # and is generally probably already the case 364 fileobj.write(b'\x00' * (self.low_offset - fileobj.tell())) 365 366 def getSymbolTableCommand(self): 367 for lc, cmd, data in self.commands: 368 if lc.cmd == LC_SYMTAB: 369 return cmd 370 return None 371 372 def getDynamicSymbolTableCommand(self): 373 for lc, cmd, data in self.commands: 374 if lc.cmd == LC_DYSYMTAB: 375 return cmd 376 return None 377 378 def get_filetype_shortname(self, filetype): 379 if filetype in MH_FILETYPE_SHORTNAMES: 380 return MH_FILETYPE_SHORTNAMES[filetype] 381 else: 382 return 'unknown' 383 384def main(fn): 385 m = MachO(fn) 386 seen = set() 387 for header in m.headers: 388 for idx, name, other in header.walkRelocatables(): 389 if other not in seen: 390 seen.add(other) 391 print('\t' + name + ": " + other) 392 393if __name__ == '__main__': 394 import sys 395 files = sys.argv[1:] or ['/bin/ls'] 396 for fn in files: 397 print(fn) 398 main(fn) 399