1"""
2Utilities for reading and writing Mach-O headers
3"""
4from __future__ import print_function
5
6import sys
7import struct
8
9from macholib.mach_o import *
10from macholib.dyld import dyld_find, framework_info
11from macholib.util import fileview
12try:
13    from macholib.compat import bytes
14except ImportError:
15    pass
16
17try:
18    unicode
19except NameError:
20    unicode = str
21
22__all__ = ['MachO']
23
24_RELOCATABLE = set((
25    # relocatable commands that should be used for dependency walking
26    LC_LOAD_DYLIB,
27    LC_LOAD_WEAK_DYLIB,
28    LC_PREBOUND_DYLIB,
29    LC_REEXPORT_DYLIB,
30))
31
32_RELOCATABLE_NAMES = {
33    LC_LOAD_DYLIB: 'load_dylib',
34    LC_LOAD_WEAK_DYLIB: 'load_weak_dylib',
35    LC_PREBOUND_DYLIB: 'prebound_dylib',
36    LC_REEXPORT_DYLIB: 'reexport_dylib',
37}
38
39def _shouldRelocateCommand(cmd):
40    """
41    Should this command id be investigated for relocation?
42    """
43    return cmd in _RELOCATABLE
44
45class MachO(object):
46    """
47    Provides reading/writing the Mach-O header of a specific existing file
48    """
49    #   filename   - the original filename of this mach-o
50    #   sizediff   - the current deviation from the initial mach-o size
51    #   header     - the mach-o header
52    #   commands   - a list of (load_command, somecommand, data)
53    #                data is either a str, or a list of segment structures
54    #   total_size - the current mach-o header size (including header)
55    #   low_offset - essentially, the maximum mach-o header size
56    #   id_cmd     - the index of my id command, or None
57
58
59    def __init__(self, filename):
60
61        # supports the ObjectGraph protocol
62        self.graphident = filename
63        self.filename = filename
64
65        # initialized by load
66        self.fat = None
67        self.headers = []
68        with open(filename, 'rb') as fp:
69            self.load(fp)
70
71    def __repr__(self):
72        return "<MachO filename=%r>" % (self.filename,)
73
74    def load(self, fh):
75        assert fh.tell() == 0
76        header = struct.unpack('>I', fh.read(4))[0]
77        fh.seek(0)
78        if header == FAT_MAGIC:
79            self.load_fat(fh)
80        else:
81            fh.seek(0, 2)
82            size = fh.tell()
83            fh.seek(0)
84            self.load_header(fh, 0, size)
85
86    def load_fat(self, fh):
87        self.fat = fat_header.from_fileobj(fh)
88        archs = [fat_arch.from_fileobj(fh) for i in range(self.fat.nfat_arch)]
89        for arch in archs:
90            self.load_header(fh, arch.offset, arch.size)
91
92    def rewriteLoadCommands(self, *args, **kw):
93        changed = False
94        for header in self.headers:
95            if header.rewriteLoadCommands(*args, **kw):
96                changed = True
97        return changed
98
99    def load_header(self, fh, offset, size):
100        fh.seek(offset)
101        header = struct.unpack('>I', fh.read(4))[0]
102        fh.seek(offset)
103        if header == MH_MAGIC:
104            magic, hdr, endian = MH_MAGIC, mach_header, '>'
105        elif header == MH_CIGAM:
106            magic, hdr, endian = MH_CIGAM, mach_header, '<'
107        elif header == MH_MAGIC_64:
108            magic, hdr, endian = MH_MAGIC_64, mach_header_64, '>'
109        elif header == MH_CIGAM_64:
110            magic, hdr, endian = MH_CIGAM_64, mach_header_64, '<'
111        else:
112            raise ValueError("Unknown Mach-O header: 0x%08x in %r" % (
113                header, fh))
114        hdr = MachOHeader(self, fh, offset, size, magic, hdr, endian)
115        self.headers.append(hdr)
116
117    def write(self, f):
118        for header in self.headers:
119            header.write(f)
120
121class MachOHeader(object):
122    """
123    Provides reading/writing the Mach-O header of a specific existing file
124    """
125    #   filename   - the original filename of this mach-o
126    #   sizediff   - the current deviation from the initial mach-o size
127    #   header     - the mach-o header
128    #   commands   - a list of (load_command, somecommand, data)
129    #                data is either a str, or a list of segment structures
130    #   total_size - the current mach-o header size (including header)
131    #   low_offset - essentially, the maximum mach-o header size
132    #   id_cmd     - the index of my id command, or None
133
134
135    def __init__(self, parent, fh, offset, size, magic, hdr, endian):
136        self.MH_MAGIC = magic
137        self.mach_header = hdr
138
139        # These are all initialized by self.load()
140        self.parent = parent
141        self.offset = offset
142        self.size = size
143
144        self.endian = endian
145        self.header = None
146        self.commands = None
147        self.id_cmd = None
148        self.sizediff = None
149        self.total_size = None
150        self.low_offset = None
151        self.filetype = None
152        self.headers = []
153
154        self.load(fh)
155
156    def __repr__(self):
157        return "<%s filename=%r offset=%d size=%d endian=%r>" % (
158            type(self).__name__, self.parent.filename, self.offset, self.size,
159            self.endian)
160
161    def load(self, fh):
162        fh = fileview(fh, self.offset, self.size)
163        fh.seek(0)
164
165        self.sizediff = 0
166        kw = {'_endian_': self.endian}
167        header = self.mach_header.from_fileobj(fh, **kw)
168        self.header = header
169        #if header.magic != self.MH_MAGIC:
170        #    raise ValueError("header has magic %08x, expecting %08x" % (
171        #        header.magic, self.MH_MAGIC))
172
173        cmd = self.commands = []
174
175        self.filetype = self.get_filetype_shortname(header.filetype)
176
177        read_bytes = 0
178        low_offset = sys.maxsize
179        for i in range(header.ncmds):
180            # read the load command
181            cmd_load = load_command.from_fileobj(fh, **kw)
182
183            # read the specific command
184            klass = LC_REGISTRY.get(cmd_load.cmd, None)
185            if klass is None:
186                raise ValueError("Unknown load command: %d" % (cmd_load.cmd,))
187            cmd_cmd = klass.from_fileobj(fh, **kw)
188
189            if cmd_load.cmd == LC_ID_DYLIB:
190                # remember where this command was
191                if self.id_cmd is not None:
192                    raise ValueError("This dylib already has an id")
193                self.id_cmd = i
194
195            if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64):
196                # for segment commands, read the list of segments
197                segs = []
198                # assert that the size makes sense
199                if cmd_load.cmd == LC_SEGMENT:
200                    section_cls = section
201                else: # LC_SEGMENT_64
202                    section_cls = section_64
203
204                expected_size = (
205                    sizeof(klass) + sizeof(load_command) +
206                    (sizeof(section_cls) * cmd_cmd.nsects)
207                )
208                if cmd_load.cmdsize != expected_size:
209                    raise ValueError("Segment size mismatch")
210                # this is a zero block or something
211                # so the beginning is wherever the fileoff of this command is
212                if cmd_cmd.nsects == 0:
213                    if cmd_cmd.filesize != 0:
214                        low_offset = min(low_offset, cmd_cmd.fileoff)
215                else:
216                    # this one has multiple segments
217                    for j in range(cmd_cmd.nsects):
218                        # read the segment
219                        seg = section_cls.from_fileobj(fh, **kw)
220                        # if the segment has a size and is not zero filled
221                        # then its beginning is the offset of this segment
222                        not_zerofill = ((seg.flags & S_ZEROFILL) != S_ZEROFILL)
223                        if seg.offset > 0 and seg.size > 0 and not_zerofill:
224                            low_offset = min(low_offset, seg.offset)
225                        if not_zerofill:
226                            c = fh.tell()
227                            fh.seek(seg.offset)
228                            sd = fh.read(seg.size)
229                            seg.add_section_data(sd)
230                            fh.seek(c)
231                        segs.append(seg)
232                # data is a list of segments
233                cmd_data = segs
234
235            # XXX: Disabled for now because writing back doesn't work
236            #elif cmd_load.cmd == LC_CODE_SIGNATURE:
237            #    c = fh.tell()
238            #    fh.seek(cmd_cmd.dataoff)
239            #    cmd_data = fh.read(cmd_cmd.datasize)
240            #    fh.seek(c)
241            #elif cmd_load.cmd == LC_SYMTAB:
242            #    c = fh.tell()
243            #    fh.seek(cmd_cmd.stroff)
244            #    cmd_data = fh.read(cmd_cmd.strsize)
245            #    fh.seek(c)
246
247            else:
248                # data is a raw str
249                data_size = (
250                    cmd_load.cmdsize - sizeof(klass) - sizeof(load_command)
251                )
252                cmd_data = fh.read(data_size)
253            cmd.append((cmd_load, cmd_cmd, cmd_data))
254            read_bytes += cmd_load.cmdsize
255
256        # make sure the header made sense
257        if read_bytes != header.sizeofcmds:
258            raise ValueError("Read %d bytes, header reports %d bytes" % (
259                read_bytes, header.sizeofcmds))
260        self.total_size = sizeof(self.mach_header) + read_bytes
261        self.low_offset = low_offset
262
263        # this header overwrites a segment, what the heck?
264        if self.total_size > low_offset:
265            raise ValueError("total_size > low_offset (%d > %d)" % (
266                self.total_size, low_offset))
267
268    def walkRelocatables(self, shouldRelocateCommand=_shouldRelocateCommand):
269        """
270        for all relocatable commands
271        yield (command_index, command_name, filename)
272        """
273        for (idx, (lc, cmd, data)) in enumerate(self.commands):
274            if shouldRelocateCommand(lc.cmd):
275                name = _RELOCATABLE_NAMES[lc.cmd]
276                ofs = cmd.name - sizeof(lc.__class__) - sizeof(cmd.__class__)
277                yield idx, name, data[ofs:data.find(b'\x00', ofs)].decode(
278                        sys.getfilesystemencoding())
279
280    def rewriteInstallNameCommand(self, loadcmd):
281        """Rewrite the load command of this dylib"""
282        if self.id_cmd is not None:
283            self.rewriteDataForCommand(self.id_cmd, loadcmd)
284            return True
285        return False
286
287    def changedHeaderSizeBy(self, bytes):
288        self.sizediff += bytes
289        if (self.total_size + self.sizediff) > self.low_offset:
290            print("WARNING: Mach-O header in %r may be too large to relocate"%(self.parent.filename,))
291
292    def rewriteLoadCommands(self, changefunc):
293        """
294        Rewrite the load commands based upon a change dictionary
295        """
296        data = changefunc(self.parent.filename)
297        changed = False
298        if data is not None:
299            if self.rewriteInstallNameCommand(
300                    data.encode(sys.getfilesystemencoding())):
301                changed = True
302        for idx, name, filename in self.walkRelocatables():
303            data = changefunc(filename)
304            if data is not None:
305                if self.rewriteDataForCommand(idx, data.encode(
306                        sys.getfilesystemencoding())):
307                    changed = True
308        return changed
309
310    def rewriteDataForCommand(self, idx, data):
311        lc, cmd, old_data = self.commands[idx]
312        hdrsize = sizeof(lc.__class__) + sizeof(cmd.__class__)
313        align = struct.calcsize('L')
314        data = data + (b'\x00' * (align - (len(data) % align)))
315        newsize = hdrsize + len(data)
316        self.commands[idx] = (lc, cmd, data)
317        self.changedHeaderSizeBy(newsize - lc.cmdsize)
318        lc.cmdsize, cmd.name = newsize, hdrsize
319        return True
320
321    def synchronize_size(self):
322        if (self.total_size + self.sizediff) > self.low_offset:
323            raise ValueError("New Mach-O header is too large to relocate in %r"%(self.parent.filename,))
324        self.header.sizeofcmds += self.sizediff
325        self.total_size = sizeof(self.mach_header) + self.header.sizeofcmds
326        self.sizediff = 0
327
328    def write(self, fileobj):
329        fileobj = fileview(fileobj, self.offset, self.size)
330        fileobj.seek(0)
331
332        # serialize all the mach-o commands
333        self.synchronize_size()
334
335        self.header.to_fileobj(fileobj)
336        for lc, cmd, data in self.commands:
337            lc.to_fileobj(fileobj)
338            cmd.to_fileobj(fileobj)
339
340            if sys.version_info[0] == 2:
341                if isinstance(data, unicode):
342                    fileobj.write(data.encode(sys.getfilesystemencoding()))
343
344                elif isinstance(data, (bytes, str)):
345                    fileobj.write(data)
346                else:
347                    # segments..
348                    for obj in data:
349                        obj.to_fileobj(fileobj)
350            else:
351                if isinstance(data, str):
352                    fileobj.write(data.encode(sys.getfilesystemencoding()))
353
354                elif isinstance(data, bytes):
355                    fileobj.write(data)
356
357                else:
358                    # segments..
359                    for obj in data:
360                        obj.to_fileobj(fileobj)
361
362        # zero out the unused space, doubt this is strictly necessary
363        # and is generally probably already the case
364        fileobj.write(b'\x00' * (self.low_offset - fileobj.tell()))
365
366    def getSymbolTableCommand(self):
367        for lc, cmd, data in self.commands:
368            if lc.cmd == LC_SYMTAB:
369                return cmd
370        return None
371
372    def getDynamicSymbolTableCommand(self):
373        for lc, cmd, data in self.commands:
374            if lc.cmd == LC_DYSYMTAB:
375                return cmd
376        return None
377
378    def get_filetype_shortname(self, filetype):
379        if filetype in MH_FILETYPE_SHORTNAMES:
380            return MH_FILETYPE_SHORTNAMES[filetype]
381        else:
382            return 'unknown'
383
384def main(fn):
385    m = MachO(fn)
386    seen = set()
387    for header in m.headers:
388        for idx, name, other in header.walkRelocatables():
389            if other not in seen:
390                seen.add(other)
391                print('\t' + name + ": " + other)
392
393if __name__ == '__main__':
394    import sys
395    files = sys.argv[1:] or ['/bin/ls']
396    for fn in files:
397        print(fn)
398        main(fn)
399