1"""
2Utilities for reading and writing Mach-O headers
3"""
4from __future__ import print_function
5
6import sys
7import struct
8import os
9
10from .mach_o import MH_FILETYPE_SHORTNAMES, LC_DYSYMTAB, LC_SYMTAB
11from .mach_o import load_command, S_ZEROFILL, section_64, section
12from .mach_o import LC_REGISTRY, LC_ID_DYLIB, LC_SEGMENT, fat_header
13from .mach_o import LC_SEGMENT_64, MH_CIGAM_64, MH_MAGIC_64, FAT_MAGIC
14from .mach_o import mach_header, fat_arch64, FAT_MAGIC_64, fat_arch
15from .mach_o import LC_REEXPORT_DYLIB, LC_PREBOUND_DYLIB, LC_LOAD_WEAK_DYLIB
16from .mach_o import LC_LOAD_UPWARD_DYLIB, LC_LOAD_DYLIB, mach_header_64
17from .mach_o import MH_CIGAM, MH_MAGIC
18from .ptypes import sizeof
19
20from macholib.util import fileview
21try:
22    from macholib.compat import bytes
23except ImportError:
24    pass
25
26try:
27    unicode
28except NameError:
29    unicode = str
30
31if sys.version_info[0] == 2:
32    range = xrange  # noqa: F821
33
34__all__ = ['MachO']
35
36_RELOCATABLE = set((
37    # relocatable commands that should be used for dependency walking
38    LC_LOAD_DYLIB,
39    LC_LOAD_UPWARD_DYLIB,
40    LC_LOAD_WEAK_DYLIB,
41    LC_PREBOUND_DYLIB,
42    LC_REEXPORT_DYLIB,
43))
44
45_RELOCATABLE_NAMES = {
46    LC_LOAD_DYLIB: 'load_dylib',
47    LC_LOAD_UPWARD_DYLIB: 'load_upward_dylib',
48    LC_LOAD_WEAK_DYLIB: 'load_weak_dylib',
49    LC_PREBOUND_DYLIB: 'prebound_dylib',
50    LC_REEXPORT_DYLIB: 'reexport_dylib',
51}
52
53
54def _shouldRelocateCommand(cmd):
55    """
56    Should this command id be investigated for relocation?
57    """
58    return cmd in _RELOCATABLE
59
60
61def lc_str_value(offset, cmd_info):
62    """
63    Fetch the actual value of a field of type "lc_str"
64    """
65    cmd_load, cmd_cmd, cmd_data = cmd_info
66
67    offset -= sizeof(cmd_load) + sizeof(cmd_cmd)
68    return cmd_data[offset:].strip(b'\x00')
69
70
71class MachO(object):
72    """
73    Provides reading/writing the Mach-O header of a specific existing file
74    """
75    #   filename   - the original filename of this mach-o
76    #   sizediff   - the current deviation from the initial mach-o size
77    #   header     - the mach-o header
78    #   commands   - a list of (load_command, somecommand, data)
79    #                data is either a str, or a list of segment structures
80    #   total_size - the current mach-o header size (including header)
81    #   low_offset - essentially, the maximum mach-o header size
82    #   id_cmd     - the index of my id command, or None
83
84    def __init__(self, filename):
85
86        # supports the ObjectGraph protocol
87        self.graphident = filename
88        self.filename = filename
89        self.loader_path = os.path.dirname(filename)
90
91        # initialized by load
92        self.fat = None
93        self.headers = []
94        with open(filename, 'rb') as fp:
95            self.load(fp)
96
97    def __repr__(self):
98        return "<MachO filename=%r>" % (self.filename,)
99
100    def load(self, fh):
101        assert fh.tell() == 0
102        header = struct.unpack('>I', fh.read(4))[0]
103        fh.seek(0)
104        if header in (FAT_MAGIC, FAT_MAGIC_64):
105            self.load_fat(fh)
106        else:
107            fh.seek(0, 2)
108            size = fh.tell()
109            fh.seek(0)
110            self.load_header(fh, 0, size)
111
112    def load_fat(self, fh):
113        self.fat = fat_header.from_fileobj(fh)
114        if self.fat.magic == FAT_MAGIC:
115            archs = [fat_arch.from_fileobj(fh)
116                     for i in range(self.fat.nfat_arch)]
117        elif self.fat.magic == FAT_MAGIC_64:
118            archs = [fat_arch64.from_fileobj(fh)
119                     for i in range(self.fat.nfat_arch)]
120        else:
121            raise ValueError("Unknown fat header magic: %r" % (self.fat.magic))
122
123        for arch in archs:
124            self.load_header(fh, arch.offset, arch.size)
125
126    def rewriteLoadCommands(self, *args, **kw):
127        changed = False
128        for header in self.headers:
129            if header.rewriteLoadCommands(*args, **kw):
130                changed = True
131        return changed
132
133    def load_header(self, fh, offset, size):
134        fh.seek(offset)
135        header = struct.unpack('>I', fh.read(4))[0]
136        fh.seek(offset)
137        if header == MH_MAGIC:
138            magic, hdr, endian = MH_MAGIC, mach_header, '>'
139        elif header == MH_CIGAM:
140            magic, hdr, endian = MH_CIGAM, mach_header, '<'
141        elif header == MH_MAGIC_64:
142            magic, hdr, endian = MH_MAGIC_64, mach_header_64, '>'
143        elif header == MH_CIGAM_64:
144            magic, hdr, endian = MH_CIGAM_64, mach_header_64, '<'
145        else:
146            raise ValueError("Unknown Mach-O header: 0x%08x in %r" % (
147                header, fh))
148        hdr = MachOHeader(self, fh, offset, size, magic, hdr, endian)
149        self.headers.append(hdr)
150
151    def write(self, f):
152        for header in self.headers:
153            header.write(f)
154
155
156class MachOHeader(object):
157    """
158    Provides reading/writing the Mach-O header of a specific existing file
159    """
160    #   filename   - the original filename of this mach-o
161    #   sizediff   - the current deviation from the initial mach-o size
162    #   header     - the mach-o header
163    #   commands   - a list of (load_command, somecommand, data)
164    #                data is either a str, or a list of segment structures
165    #   total_size - the current mach-o header size (including header)
166    #   low_offset - essentially, the maximum mach-o header size
167    #   id_cmd     - the index of my id command, or None
168
169    def __init__(self, parent, fh, offset, size, magic, hdr, endian):
170        self.MH_MAGIC = magic
171        self.mach_header = hdr
172
173        # These are all initialized by self.load()
174        self.parent = parent
175        self.offset = offset
176        self.size = size
177
178        self.endian = endian
179        self.header = None
180        self.commands = None
181        self.id_cmd = None
182        self.sizediff = None
183        self.total_size = None
184        self.low_offset = None
185        self.filetype = None
186        self.headers = []
187
188        self.load(fh)
189
190    def __repr__(self):
191        return "<%s filename=%r offset=%d size=%d endian=%r>" % (
192            type(self).__name__, self.parent.filename, self.offset, self.size,
193            self.endian)
194
195    def load(self, fh):
196        fh = fileview(fh, self.offset, self.size)
197        fh.seek(0)
198
199        self.sizediff = 0
200        kw = {'_endian_': self.endian}
201        header = self.mach_header.from_fileobj(fh, **kw)
202        self.header = header
203        # if header.magic != self.MH_MAGIC:
204        #    raise ValueError("header has magic %08x, expecting %08x" % (
205        #        header.magic, self.MH_MAGIC))
206
207        cmd = self.commands = []
208
209        self.filetype = self.get_filetype_shortname(header.filetype)
210
211        read_bytes = 0
212        low_offset = sys.maxsize
213        for i in range(header.ncmds):
214            # read the load command
215            cmd_load = load_command.from_fileobj(fh, **kw)
216
217            # read the specific command
218            klass = LC_REGISTRY.get(cmd_load.cmd, None)
219            if klass is None:
220                raise ValueError("Unknown load command: %d" % (cmd_load.cmd,))
221            cmd_cmd = klass.from_fileobj(fh, **kw)
222
223            if cmd_load.cmd == LC_ID_DYLIB:
224                # remember where this command was
225                if self.id_cmd is not None:
226                    raise ValueError("This dylib already has an id")
227                self.id_cmd = i
228
229            if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64):
230                # for segment commands, read the list of segments
231                segs = []
232                # assert that the size makes sense
233                if cmd_load.cmd == LC_SEGMENT:
234                    section_cls = section
235                else:  # LC_SEGMENT_64
236                    section_cls = section_64
237
238                expected_size = (
239                    sizeof(klass) + sizeof(load_command) +
240                    (sizeof(section_cls) * cmd_cmd.nsects)
241                )
242                if cmd_load.cmdsize != expected_size:
243                    raise ValueError("Segment size mismatch")
244                # this is a zero block or something
245                # so the beginning is wherever the fileoff of this command is
246                if cmd_cmd.nsects == 0:
247                    if cmd_cmd.filesize != 0:
248                        low_offset = min(low_offset, cmd_cmd.fileoff)
249                else:
250                    # this one has multiple segments
251                    for j in range(cmd_cmd.nsects):
252                        # read the segment
253                        seg = section_cls.from_fileobj(fh, **kw)
254                        # if the segment has a size and is not zero filled
255                        # then its beginning is the offset of this segment
256                        not_zerofill = ((seg.flags & S_ZEROFILL) != S_ZEROFILL)
257                        if seg.offset > 0 and seg.size > 0 and not_zerofill:
258                            low_offset = min(low_offset, seg.offset)
259                        if not_zerofill:
260                            c = fh.tell()
261                            fh.seek(seg.offset)
262                            sd = fh.read(seg.size)
263                            seg.add_section_data(sd)
264                            fh.seek(c)
265                        segs.append(seg)
266                # data is a list of segments
267                cmd_data = segs
268
269            # XXX: Disabled for now because writing back doesn't work
270            # elif cmd_load.cmd == LC_CODE_SIGNATURE:
271            #    c = fh.tell()
272            #    fh.seek(cmd_cmd.dataoff)
273            #    cmd_data = fh.read(cmd_cmd.datasize)
274            #    fh.seek(c)
275            # elif cmd_load.cmd == LC_SYMTAB:
276            #    c = fh.tell()
277            #    fh.seek(cmd_cmd.stroff)
278            #    cmd_data = fh.read(cmd_cmd.strsize)
279            #    fh.seek(c)
280
281            else:
282                # data is a raw str
283                data_size = (
284                    cmd_load.cmdsize - sizeof(klass) - sizeof(load_command)
285                )
286                cmd_data = fh.read(data_size)
287            cmd.append((cmd_load, cmd_cmd, cmd_data))
288            read_bytes += cmd_load.cmdsize
289
290        # make sure the header made sense
291        if read_bytes != header.sizeofcmds:
292            raise ValueError("Read %d bytes, header reports %d bytes" % (
293                read_bytes, header.sizeofcmds))
294        self.total_size = sizeof(self.mach_header) + read_bytes
295        self.low_offset = low_offset
296
297    def walkRelocatables(self, shouldRelocateCommand=_shouldRelocateCommand):
298        """
299        for all relocatable commands
300        yield (command_index, command_name, filename)
301        """
302        for (idx, (lc, cmd, data)) in enumerate(self.commands):
303            if shouldRelocateCommand(lc.cmd):
304                name = _RELOCATABLE_NAMES[lc.cmd]
305                ofs = cmd.name - sizeof(lc.__class__) - sizeof(cmd.__class__)
306                yield idx, name, data[ofs:data.find(b'\x00', ofs)].decode(
307                        sys.getfilesystemencoding())
308
309    def rewriteInstallNameCommand(self, loadcmd):
310        """Rewrite the load command of this dylib"""
311        if self.id_cmd is not None:
312            self.rewriteDataForCommand(self.id_cmd, loadcmd)
313            return True
314        return False
315
316    def changedHeaderSizeBy(self, bytes):
317        self.sizediff += bytes
318        if (self.total_size + self.sizediff) > self.low_offset:
319            print(
320                "WARNING: Mach-O header in %r may be too large to relocate" % (
321                    self.parent.filename,))
322
323    def rewriteLoadCommands(self, changefunc):
324        """
325        Rewrite the load commands based upon a change dictionary
326        """
327        data = changefunc(self.parent.filename)
328        changed = False
329        if data is not None:
330            if self.rewriteInstallNameCommand(
331                    data.encode(sys.getfilesystemencoding())):
332                changed = True
333        for idx, name, filename in self.walkRelocatables():
334            data = changefunc(filename)
335            if data is not None:
336                if self.rewriteDataForCommand(idx, data.encode(
337                        sys.getfilesystemencoding())):
338                    changed = True
339        return changed
340
341    def rewriteDataForCommand(self, idx, data):
342        lc, cmd, old_data = self.commands[idx]
343        hdrsize = sizeof(lc.__class__) + sizeof(cmd.__class__)
344        align = struct.calcsize('Q')
345        data = data + (b'\x00' * (align - (len(data) % align)))
346        newsize = hdrsize + len(data)
347        self.commands[idx] = (lc, cmd, data)
348        self.changedHeaderSizeBy(newsize - lc.cmdsize)
349        lc.cmdsize, cmd.name = newsize, hdrsize
350        return True
351
352    def synchronize_size(self):
353        if (self.total_size + self.sizediff) > self.low_offset:
354            raise ValueError(
355                ("New Mach-O header is too large to relocate in %r "
356                 "(new size=%r, max size=%r, delta=%r)") % (
357                    self.parent.filename, self.total_size + self.sizediff,
358                    self.low_offset, self.sizediff))
359        self.header.sizeofcmds += self.sizediff
360        self.total_size = sizeof(self.mach_header) + self.header.sizeofcmds
361        self.sizediff = 0
362
363    def write(self, fileobj):
364        fileobj = fileview(fileobj, self.offset, self.size)
365        fileobj.seek(0)
366
367        # serialize all the mach-o commands
368        self.synchronize_size()
369
370        self.header.to_fileobj(fileobj)
371        for lc, cmd, data in self.commands:
372            lc.to_fileobj(fileobj)
373            cmd.to_fileobj(fileobj)
374
375            if sys.version_info[0] == 2:
376                if isinstance(data, unicode):
377                    fileobj.write(data.encode(sys.getfilesystemencoding()))
378
379                elif isinstance(data, (bytes, str)):
380                    fileobj.write(data)
381                else:
382                    # segments..
383                    for obj in data:
384                        obj.to_fileobj(fileobj)
385            else:
386                if isinstance(data, str):
387                    fileobj.write(data.encode(sys.getfilesystemencoding()))
388
389                elif isinstance(data, bytes):
390                    fileobj.write(data)
391
392                else:
393                    # segments..
394                    for obj in data:
395                        obj.to_fileobj(fileobj)
396
397        # zero out the unused space, doubt this is strictly necessary
398        # and is generally probably already the case
399        fileobj.write(b'\x00' * (self.low_offset - fileobj.tell()))
400
401    def getSymbolTableCommand(self):
402        for lc, cmd, data in self.commands:
403            if lc.cmd == LC_SYMTAB:
404                return cmd
405        return None
406
407    def getDynamicSymbolTableCommand(self):
408        for lc, cmd, data in self.commands:
409            if lc.cmd == LC_DYSYMTAB:
410                return cmd
411        return None
412
413    def get_filetype_shortname(self, filetype):
414        if filetype in MH_FILETYPE_SHORTNAMES:
415            return MH_FILETYPE_SHORTNAMES[filetype]
416        else:
417            return 'unknown'
418
419
420def main(fn):
421    m = MachO(fn)
422    seen = set()
423    for header in m.headers:
424        for idx, name, other in header.walkRelocatables():
425            if other not in seen:
426                seen.add(other)
427                print('\t' + name + ": " + other)
428
429
430if __name__ == '__main__':
431    import sys
432    files = sys.argv[1:] or ['/bin/ls']
433    for fn in files:
434        print(fn)
435        main(fn)
436