1# -*-coding:utf8 -*-
2# This file is part of Mach-O Loader for CLE.
3# Contributed December 2016 by Fraunhofer SIT (https://www.sit.fraunhofer.de/en/) and updated in September 2019.
4
5import struct
6from typing import Callable, Dict, Tuple
7
8from .symbol import BindingSymbol
9
10from typing import TYPE_CHECKING
11
12if TYPE_CHECKING:
13    from ... import MachO
14
15from ...errors import CLEInvalidBinaryError
16from ...address_translator import AT
17
18import logging
19l = logging.getLogger(name=__name__)
20
21OPCODE_MASK = 0xF0
22IMM_MASK = 0x0F
23BIND_TYPE_POINTER = 1
24BIND_TYPE_TEXT_ABSOLUTE32 = 2
25BIND_TYPE_TEXT_PCREL32 = 3
26BIND_OPCODE_DONE = 0x00
27BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10
28BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20
29BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30
30BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40
31BIND_OPCODE_SET_TYPE_IMM = 0x50
32BIND_OPCODE_SET_ADDEND_SLEB = 0x60
33BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70
34BIND_OPCODE_ADD_ADDR_ULEB = 0x80
35BIND_OPCODE_DO_BIND = 0x90
36BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0
37BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0
38BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0
39
40if bytes is not str:
41    chh = lambda x: x
42else:
43    chh = ord
44
45
46def read_uleb(blob: bytes, offset: int) -> Tuple[int, int]:
47    """Reads a number encoded as uleb128"""
48    result = 0
49    shift = 0
50    index = offset
51
52    while index < len(blob):
53        b = chh(blob[index])
54        result |= ((b & 0x7f) << shift)
55        shift += 7
56        index += 1
57        if b & 0x80 == 0:
58            break
59
60    return result, index - offset
61
62
63def read_sleb(blob, offset):
64    """Reads a number encoded as sleb128"""
65    result = 0
66    shift = 0
67    index = offset
68
69    while index < len(blob):
70        b = chh(blob[index])
71        result |= ((b & 0x7f) << shift)
72        shift += 7
73        index += 1
74        if b & 0x80 == 0:
75            if b & 0x40:
76                # two's complement
77                result -= (1 << shift)
78            break
79
80    return result, index - offset
81
82
83class BindingState():
84    """State object"""
85
86    def __init__(self, is_64):
87        self.index = 0
88        self.done = False
89        self.lib_ord = 0
90        self.sym_name = ""
91        self.sym_flags = 0
92        self.binding_type = 0
93        self.addend = 0
94        self.segment_index = 0
95        self.address = 0
96        self.seg_end_address = 0  # TODO: no rebasing support
97        self.wraparound = 2 ** 64  # address is expected to properly overflow and address is uintptr_t (unsigned long according to _uintptr_t.h)
98        self.sizeof_intptr_t = 8 if is_64 else 4  # experimentally determined
99        self.bind_handler = None  # function(state,binary) => None
100
101    def add_address_ov(self, address, addend):
102        """ this is a very ugly klugde. It is needed because dyld relies on overflow
103            semantics and represents several negative offsets through BIG ulebs"""
104        tmp = address + addend
105        if tmp > self.wraparound:
106            tmp -= self.wraparound
107        self.address = tmp
108
109    def check_address_bounds(self):
110        if self.address >= self.seg_end_address:
111            l.error("index %d: address >= seg_end_address (%#x >= %#x)", self.index, self.address, self.seg_end_address)
112            raise CLEInvalidBinaryError()
113
114
115class BindingHelper():
116    """Factors out binding logic from MachO.
117    Intended to work in close conjunction with MachO not for standalone use"""
118    binary: 'MachO'
119
120    def __init__(self, binary):
121        self.binary = binary
122
123    def do_normal_bind(self, blob: bytes):
124        """Performs non-lazy, non-weak bindings
125        :param blob: Blob containing binding opcodes"""
126
127        if blob is None:
128            return  # skip
129
130        l.debug("Binding non-lazy, non-weak symbols")
131        s = BindingState(self.binary.arch.bits == 64)
132        seg = self.binary.segments[0]
133        s.seg_end_address = seg.vaddr + seg.memsize
134        s.bind_handler = default_binding_handler
135        self._do_bind_generic(blob, s, {
136            0: n_opcode_done,
137            0x10: n_opcode_set_dylib_ordinal_imm,
138            0x20: n_opcode_set_dylib_ordinal_uleb,
139            0x30: n_opcode_set_dylib_special_imm,
140            0x40: n_opcode_set_trailing_flags_imm,
141            0x50: n_opcode_set_type_imm,
142            0x60: n_opcode_set_addend_sleb,
143            0x70: n_opcode_set_segment_and_offset_uleb,
144            0x80: n_opcode_add_addr_uleb,
145            0x90: n_opcode_do_bind,
146            0xA0: n_opcode_do_bind_add_addr_uleb,
147            0xB0: n_opcode_do_bind_add_addr_imm_scaled,
148            0xC0: n_opcode_do_bind_uleb_times_skipping_uleb
149        })
150
151        l.debug("Done binding non-lazy, non-weak symbols ")
152
153    def do_lazy_bind(self, blob):
154        """
155        Performs lazy binding
156        """
157        if blob is None:
158            return  # skip
159        l.debug("Binding lazy symbols")
160
161        s = BindingState(self.binary.arch.bits == 64)
162        s.index = 0
163        s.bind_handler = default_binding_handler
164        end = len(blob)
165        # We need to iterate the iteration as every lazy binding entry ends with BIND_OPCODE_DONE
166        while s.index < end:
167            # re-initialise state (except index)
168            s.binding_type = 1
169            s.address = 0
170            s.sym_name = ""
171            s.sym_flags = 0
172            s.lib_ord = 0
173            s.done = False
174            s.addend = 0
175            s.segment_index = 0
176            s.seg_end_address = 0  # TODO: no rebasing support
177
178            self._do_bind_generic(blob, s, {
179                0x00: n_opcode_done,
180                0x10: n_opcode_set_dylib_ordinal_imm,
181                0x20: n_opcode_set_dylib_ordinal_uleb,
182                0x30: n_opcode_set_dylib_special_imm,
183                0x40: n_opcode_set_trailing_flags_imm,
184                0x50: n_opcode_set_type_imm,
185                0x70: l_opcode_set_segment_and_offset_uleb,
186                0x90: l_opcode_do_bind,
187            })
188
189        l.debug("Done binding lazy symbols")
190
191    def _do_bind_generic(self,
192                         blob,
193                         init_state: BindingState,
194                         opcode_dict: Dict[int,
195                                           Callable[[BindingState, 'MachO', int, bytes], BindingState]]
196                         ):
197        """
198        Does the actual binding work. Represents a generic framework for interpreting binding opcodes
199        :param blob: blob of binding opcodes
200        :param init_state: Initial BindingState
201        :param opcode_dict: Dictionary opcode=> handler
202        :return: resulting binding state
203        """
204        s = init_state
205        seg = self.binary.segments[s.segment_index]
206        s.seg_end_address = seg.vaddr + seg.memsize  # TODO: no rebasing support
207        end = len(blob)
208        while not s.done and s.index < end:
209            l.debug("Current address: %#x, blob index (offset): %#x", s.address, s.index)
210            raw_opcode = blob[s.index]
211            opcode = raw_opcode & OPCODE_MASK
212            immediate = raw_opcode & IMM_MASK
213            s.index += 1
214            try:
215                h = opcode_dict[opcode]
216                s = h(s, self.binary, immediate, blob)
217            except KeyError:
218                l.error("Invalid opcode for current binding: %#x", opcode)
219
220        return s
221
222
223# pylint: disable=unused-argument
224# The following functions realize different variants of handling binding opcodes
225# the format is def X(state,binary,immediate,blob) => state
226def n_opcode_done(s: BindingState, _b: 'MachO', _i: int, _blob: bytes) -> BindingState:
227    l.debug("BIND_OPCODE_DONE @ %#x", s.index)
228    s.done = True
229    return s
230
231
232def n_opcode_set_dylib_ordinal_imm(s: BindingState, _b: 'MachO', i: int, _blob: bytes) -> BindingState:
233    l.debug("SET_DYLIB_ORDINAL_IMM @ %#x: %d", s.index, i)
234    s.lib_ord = i
235    return s
236
237
238def n_opcode_set_dylib_ordinal_uleb(s: BindingState, _b: 'MachO', _i: int, blob: bytes) -> BindingState:
239    uleb = read_uleb(blob, s.index)
240    s.lib_ord = uleb[0]
241    s.index += uleb[1]
242    l.debug("SET_DYLIB_ORDINAL_ULEB @ %#x: %d", s.index, s.lib_ord)
243    return s
244
245
246def n_opcode_set_dylib_special_imm(s: BindingState, _b: 'MachO', i: int, _blob: bytes) -> BindingState:
247    if i == 0:
248        s.lib_ord = 0
249    else:
250        s.lib_ord = (i | OPCODE_MASK) - 256
251    l.debug("SET_DYLIB_SPECIAL_IMM @ %#x: %d", s.index, s.lib_ord)
252    return s
253
254
255def n_opcode_set_trailing_flags_imm(s: BindingState, _b: 'MachO', i: int, blob: bytes) -> BindingState:
256    s.sym_name = ""
257    s.sym_flags = i
258
259    while blob[s.index] != 0:
260        s.sym_name += chr(blob[s.index])
261        s.index += 1
262
263    s.index += 1  # move past 0 byte
264    l.debug("SET_SYMBOL_TRAILING_FLAGS_IMM @ %#x: %r,%#x", s.index - len(s.sym_name) - 1, s.sym_name, s.sym_flags)
265    return s
266
267
268def n_opcode_set_type_imm(s: BindingState, _b: 'MachO', i: int, _blob: bytes) -> BindingState:
269    # pylint: disable=unused-argument
270    s.binding_type = i
271    l.debug("SET_TYPE_IMM @ %#x: %d", s.index, s.binding_type)
272    return s
273
274
275def n_opcode_set_addend_sleb(s: BindingState, _b: 'MachO', _i: int, blob: bytes) -> BindingState:
276    sleb = read_sleb(blob, s.index)
277    s.addend = sleb[0]
278    l.debug("SET_ADDEND_SLEB @ %#x: %d", s.index, s.addend)
279    s.index += sleb[1]
280    return s
281
282
283def n_opcode_set_segment_and_offset_uleb(s: BindingState, b: 'MachO', i: int, blob: bytes) -> BindingState:
284    s.segment_index = i
285    uleb = read_uleb(blob, s.index)
286    l.debug("(n)SET_SEGMENT_AND_OFFSET_ULEB @ %#x: %d, %d", s.index, s.segment_index, uleb[0])
287    s.index += uleb[1]
288    seg = b.segments[s.segment_index]
289    s.add_address_ov(seg.vaddr, uleb[0])
290    s.seg_end_address = seg.vaddr + seg.memsize
291
292    return s
293
294
295def l_opcode_set_segment_and_offset_uleb(s: BindingState, b: 'MachO', i: int, blob: bytes) -> BindingState:
296    uleb = read_uleb(blob, s.index)
297    l.debug("(l)SET_SEGMENT_AND_OFFSET_ULEB @ %#x: %d, %d", s.index, i, uleb[0])
298    seg = b.segments[i]
299    s.add_address_ov(seg.vaddr, uleb[0])
300    s.index += uleb[1]
301    return s
302
303
304def n_opcode_add_addr_uleb(s: BindingState, _b: 'MachO', _i: int, blob: bytes) -> BindingState:
305    uleb = read_uleb(blob, s.index)
306    s.add_address_ov(s.address, uleb[0])
307    l.debug("ADD_ADDR_ULEB @ %#x: %d", s.index, uleb[0])
308    s.index += uleb[1]
309    return s
310
311
312def n_opcode_do_bind(s: BindingState, b: 'MachO', _i: int, _blob: bytes) -> BindingState:
313    l.debug("(n)DO_BIND @ %#x", s.index)
314    s.check_address_bounds()
315    s.bind_handler(s, b)
316    s.add_address_ov(s.address, s.sizeof_intptr_t)
317    return s
318
319
320def l_opcode_do_bind(s: BindingState, b: 'MachO', _i: int, _blob: bytes) -> BindingState:
321    l.debug("(l)DO_BIND @ %#x", s.index)
322    s.bind_handler(s, b)
323    return s
324
325
326def n_opcode_do_bind_add_addr_uleb(s: BindingState, b: 'MachO', _i: int, blob: bytes) -> BindingState:
327    uleb = read_uleb(blob, s.index)
328    l.debug("DO_BIND_ADD_ADDR_ULEB @ %#x: %d", s.index, uleb[0])
329    if s.address >= s.seg_end_address:
330        l.error("DO_BIND_ADD_ADDR_ULEB @ %#x: address >= seg_end_address (%#x>=%#x)",
331                s.index, s.address, s.seg_end_address)
332        raise CLEInvalidBinaryError()
333    s.index += uleb[1]
334    s.bind_handler(s, b)
335    # this is done AFTER binding in preparation for the NEXT step
336    s.add_address_ov(s.address, uleb[0] + s.sizeof_intptr_t)
337    return s
338
339
340def n_opcode_do_bind_add_addr_imm_scaled(s: BindingState, b: 'MachO', i: int, _blob: bytes) -> BindingState:
341    l.debug("DO_BIND_ADD_ADDR_IMM_SCALED @ %#x: %d", s.index, i)
342    if s.address >= s.seg_end_address:
343        l.error("DO_BIND_ADD_ADDR_IMM_SCALED @ %#x: address >= seg_end_address (%#x>=%#x)",
344                s.index, s.address, s.seg_end_address)
345        raise CLEInvalidBinaryError()
346    s.bind_handler(s, b)
347    # this is done AFTER binding in preparation for the NEXT step
348    s.add_address_ov(s.address, (i * s.sizeof_intptr_t) + s.sizeof_intptr_t)
349    return s
350
351
352def n_opcode_do_bind_uleb_times_skipping_uleb(s: BindingState, b: 'MachO', _i: int, blob: bytes) -> BindingState:
353    count = read_uleb(blob, s.index)
354    s.index += count[1]
355    skip = read_uleb(blob, s.index)
356    s.index += skip[1]
357    l.debug(
358        "DO_BIND_ULEB_TIMES_SKIPPING_ULEB @ %#x: %d,%d", s.index - skip[1] - count[1], count[0], skip[0])
359    for _ in range(0, count[0]):
360        if s.address >= s.seg_end_address:
361            l.error("DO_BIND_ADD_ADDR_IMM_SCALED @ %#x: address >= seg_end_address (%#x >= %#x)",
362                    s.index - skip[1] - count[1], s.address, s.seg_end_address)
363            raise CLEInvalidBinaryError()
364        s.bind_handler(s, b)
365        s.add_address_ov(s.address, skip[0] + s.sizeof_intptr_t)
366    return s
367
368
369# default binding handler
370def default_binding_handler(state: BindingState, binary: 'MachO'):
371    """Binds location to the symbol with the given name and library ordinal
372    """
373
374    # locate the symbol:
375    matches = binary.symbols.get_by_name_and_ordinal(state.sym_name, state.lib_ord)
376    if len(matches) > 1:
377        l.error("Cannot bind: More than one match for (%r,%d)", state.sym_name, state.lib_ord)
378        raise CLEInvalidBinaryError()
379    if len(matches) < 1:
380        l.info("No match for (%r,%d), generating BindingSymbol ...", state.sym_name, state.lib_ord)
381        matches = [BindingSymbol(binary, state.sym_name, state.lib_ord)]
382        binary.symbols.add(matches[0])
383        binary._ordered_symbols.append(matches[0])
384
385    symbol = matches[0]
386    location = state.address
387
388    # If the linked_addr is equal to zero, it's an imported symbol which is by that time unresolved.
389    # Don't write addend's there
390
391    value = symbol.linked_addr + state.addend if symbol.linked_addr != 0 else 0x0
392
393    if state.binding_type == 1:  # POINTER
394        l.debug("Updating address %#x with symobl %r @ %#x", location, state.sym_name, value)
395        binary.memory.store(
396            AT.from_lva(location, binary).to_rva(),
397            struct.pack(binary.struct_byteorder + ("Q" if binary.arch.bits == 64 else "I"), value))
398        symbol.bind_xrefs.append(location)
399    elif state.binding_type == 2:  # ABSOLUTE32
400        location_32 = location % (2 ** 32)
401        value_32 = value % (2 ** 32)
402        l.debug("Updating address %#x with symobl %r @ %#x", state.sym_name, location_32, value_32)
403        binary.memory.store(
404            AT.from_lva(location_32, binary).to_rva(),
405            struct.pack(binary.struct_byteorder + "I", value_32))
406        symbol.bind_xrefs.append(location_32)
407    elif state.binding_type == 3:  # PCREL32
408        location_32 = location % (2 ** 32)
409        value_32 = (value - (location + 4)) % (2 ** 32)
410        l.debug("Updating address %#x with symobl %r @ %#x", state.sym_name, location_32, value_32)
411        binary.memory.store(
412            AT.from_lva(location_32, binary).to_rva(),
413            struct.pack(binary.struct_byteorder + "I", value_32))
414        symbol.bind_xrefs.append(location_32)
415    else:
416        l.error("Unknown BIND_TYPE: %d", state.binding_type)
417        raise CLEInvalidBinaryError()
418