1# -*-coding:utf8 -*- 2# This file is part of Mach-O Loader for CLE. 3# Contributed December 2016 by Fraunhofer SIT (https://www.sit.fraunhofer.de/en/) and updated in September 2019. 4 5import struct 6from typing import Callable, Dict, Tuple 7 8from .symbol import BindingSymbol 9 10from typing import TYPE_CHECKING 11 12if TYPE_CHECKING: 13 from ... import MachO 14 15from ...errors import CLEInvalidBinaryError 16from ...address_translator import AT 17 18import logging 19l = logging.getLogger(name=__name__) 20 21OPCODE_MASK = 0xF0 22IMM_MASK = 0x0F 23BIND_TYPE_POINTER = 1 24BIND_TYPE_TEXT_ABSOLUTE32 = 2 25BIND_TYPE_TEXT_PCREL32 = 3 26BIND_OPCODE_DONE = 0x00 27BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10 28BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20 29BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30 30BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40 31BIND_OPCODE_SET_TYPE_IMM = 0x50 32BIND_OPCODE_SET_ADDEND_SLEB = 0x60 33BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70 34BIND_OPCODE_ADD_ADDR_ULEB = 0x80 35BIND_OPCODE_DO_BIND = 0x90 36BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0 37BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0 38BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0 39 40if bytes is not str: 41 chh = lambda x: x 42else: 43 chh = ord 44 45 46def read_uleb(blob: bytes, offset: int) -> Tuple[int, int]: 47 """Reads a number encoded as uleb128""" 48 result = 0 49 shift = 0 50 index = offset 51 52 while index < len(blob): 53 b = chh(blob[index]) 54 result |= ((b & 0x7f) << shift) 55 shift += 7 56 index += 1 57 if b & 0x80 == 0: 58 break 59 60 return result, index - offset 61 62 63def read_sleb(blob, offset): 64 """Reads a number encoded as sleb128""" 65 result = 0 66 shift = 0 67 index = offset 68 69 while index < len(blob): 70 b = chh(blob[index]) 71 result |= ((b & 0x7f) << shift) 72 shift += 7 73 index += 1 74 if b & 0x80 == 0: 75 if b & 0x40: 76 # two's complement 77 result -= (1 << shift) 78 break 79 80 return result, index - offset 81 82 83class BindingState(): 84 """State object""" 85 86 def __init__(self, is_64): 87 self.index = 0 88 self.done = False 89 self.lib_ord = 0 90 self.sym_name = "" 91 self.sym_flags = 0 92 self.binding_type = 0 93 self.addend = 0 94 self.segment_index = 0 95 self.address = 0 96 self.seg_end_address = 0 # TODO: no rebasing support 97 self.wraparound = 2 ** 64 # address is expected to properly overflow and address is uintptr_t (unsigned long according to _uintptr_t.h) 98 self.sizeof_intptr_t = 8 if is_64 else 4 # experimentally determined 99 self.bind_handler = None # function(state,binary) => None 100 101 def add_address_ov(self, address, addend): 102 """ this is a very ugly klugde. It is needed because dyld relies on overflow 103 semantics and represents several negative offsets through BIG ulebs""" 104 tmp = address + addend 105 if tmp > self.wraparound: 106 tmp -= self.wraparound 107 self.address = tmp 108 109 def check_address_bounds(self): 110 if self.address >= self.seg_end_address: 111 l.error("index %d: address >= seg_end_address (%#x >= %#x)", self.index, self.address, self.seg_end_address) 112 raise CLEInvalidBinaryError() 113 114 115class BindingHelper(): 116 """Factors out binding logic from MachO. 117 Intended to work in close conjunction with MachO not for standalone use""" 118 binary: 'MachO' 119 120 def __init__(self, binary): 121 self.binary = binary 122 123 def do_normal_bind(self, blob: bytes): 124 """Performs non-lazy, non-weak bindings 125 :param blob: Blob containing binding opcodes""" 126 127 if blob is None: 128 return # skip 129 130 l.debug("Binding non-lazy, non-weak symbols") 131 s = BindingState(self.binary.arch.bits == 64) 132 seg = self.binary.segments[0] 133 s.seg_end_address = seg.vaddr + seg.memsize 134 s.bind_handler = default_binding_handler 135 self._do_bind_generic(blob, s, { 136 0: n_opcode_done, 137 0x10: n_opcode_set_dylib_ordinal_imm, 138 0x20: n_opcode_set_dylib_ordinal_uleb, 139 0x30: n_opcode_set_dylib_special_imm, 140 0x40: n_opcode_set_trailing_flags_imm, 141 0x50: n_opcode_set_type_imm, 142 0x60: n_opcode_set_addend_sleb, 143 0x70: n_opcode_set_segment_and_offset_uleb, 144 0x80: n_opcode_add_addr_uleb, 145 0x90: n_opcode_do_bind, 146 0xA0: n_opcode_do_bind_add_addr_uleb, 147 0xB0: n_opcode_do_bind_add_addr_imm_scaled, 148 0xC0: n_opcode_do_bind_uleb_times_skipping_uleb 149 }) 150 151 l.debug("Done binding non-lazy, non-weak symbols ") 152 153 def do_lazy_bind(self, blob): 154 """ 155 Performs lazy binding 156 """ 157 if blob is None: 158 return # skip 159 l.debug("Binding lazy symbols") 160 161 s = BindingState(self.binary.arch.bits == 64) 162 s.index = 0 163 s.bind_handler = default_binding_handler 164 end = len(blob) 165 # We need to iterate the iteration as every lazy binding entry ends with BIND_OPCODE_DONE 166 while s.index < end: 167 # re-initialise state (except index) 168 s.binding_type = 1 169 s.address = 0 170 s.sym_name = "" 171 s.sym_flags = 0 172 s.lib_ord = 0 173 s.done = False 174 s.addend = 0 175 s.segment_index = 0 176 s.seg_end_address = 0 # TODO: no rebasing support 177 178 self._do_bind_generic(blob, s, { 179 0x00: n_opcode_done, 180 0x10: n_opcode_set_dylib_ordinal_imm, 181 0x20: n_opcode_set_dylib_ordinal_uleb, 182 0x30: n_opcode_set_dylib_special_imm, 183 0x40: n_opcode_set_trailing_flags_imm, 184 0x50: n_opcode_set_type_imm, 185 0x70: l_opcode_set_segment_and_offset_uleb, 186 0x90: l_opcode_do_bind, 187 }) 188 189 l.debug("Done binding lazy symbols") 190 191 def _do_bind_generic(self, 192 blob, 193 init_state: BindingState, 194 opcode_dict: Dict[int, 195 Callable[[BindingState, 'MachO', int, bytes], BindingState]] 196 ): 197 """ 198 Does the actual binding work. Represents a generic framework for interpreting binding opcodes 199 :param blob: blob of binding opcodes 200 :param init_state: Initial BindingState 201 :param opcode_dict: Dictionary opcode=> handler 202 :return: resulting binding state 203 """ 204 s = init_state 205 seg = self.binary.segments[s.segment_index] 206 s.seg_end_address = seg.vaddr + seg.memsize # TODO: no rebasing support 207 end = len(blob) 208 while not s.done and s.index < end: 209 l.debug("Current address: %#x, blob index (offset): %#x", s.address, s.index) 210 raw_opcode = blob[s.index] 211 opcode = raw_opcode & OPCODE_MASK 212 immediate = raw_opcode & IMM_MASK 213 s.index += 1 214 try: 215 h = opcode_dict[opcode] 216 s = h(s, self.binary, immediate, blob) 217 except KeyError: 218 l.error("Invalid opcode for current binding: %#x", opcode) 219 220 return s 221 222 223# pylint: disable=unused-argument 224# The following functions realize different variants of handling binding opcodes 225# the format is def X(state,binary,immediate,blob) => state 226def n_opcode_done(s: BindingState, _b: 'MachO', _i: int, _blob: bytes) -> BindingState: 227 l.debug("BIND_OPCODE_DONE @ %#x", s.index) 228 s.done = True 229 return s 230 231 232def n_opcode_set_dylib_ordinal_imm(s: BindingState, _b: 'MachO', i: int, _blob: bytes) -> BindingState: 233 l.debug("SET_DYLIB_ORDINAL_IMM @ %#x: %d", s.index, i) 234 s.lib_ord = i 235 return s 236 237 238def n_opcode_set_dylib_ordinal_uleb(s: BindingState, _b: 'MachO', _i: int, blob: bytes) -> BindingState: 239 uleb = read_uleb(blob, s.index) 240 s.lib_ord = uleb[0] 241 s.index += uleb[1] 242 l.debug("SET_DYLIB_ORDINAL_ULEB @ %#x: %d", s.index, s.lib_ord) 243 return s 244 245 246def n_opcode_set_dylib_special_imm(s: BindingState, _b: 'MachO', i: int, _blob: bytes) -> BindingState: 247 if i == 0: 248 s.lib_ord = 0 249 else: 250 s.lib_ord = (i | OPCODE_MASK) - 256 251 l.debug("SET_DYLIB_SPECIAL_IMM @ %#x: %d", s.index, s.lib_ord) 252 return s 253 254 255def n_opcode_set_trailing_flags_imm(s: BindingState, _b: 'MachO', i: int, blob: bytes) -> BindingState: 256 s.sym_name = "" 257 s.sym_flags = i 258 259 while blob[s.index] != 0: 260 s.sym_name += chr(blob[s.index]) 261 s.index += 1 262 263 s.index += 1 # move past 0 byte 264 l.debug("SET_SYMBOL_TRAILING_FLAGS_IMM @ %#x: %r,%#x", s.index - len(s.sym_name) - 1, s.sym_name, s.sym_flags) 265 return s 266 267 268def n_opcode_set_type_imm(s: BindingState, _b: 'MachO', i: int, _blob: bytes) -> BindingState: 269 # pylint: disable=unused-argument 270 s.binding_type = i 271 l.debug("SET_TYPE_IMM @ %#x: %d", s.index, s.binding_type) 272 return s 273 274 275def n_opcode_set_addend_sleb(s: BindingState, _b: 'MachO', _i: int, blob: bytes) -> BindingState: 276 sleb = read_sleb(blob, s.index) 277 s.addend = sleb[0] 278 l.debug("SET_ADDEND_SLEB @ %#x: %d", s.index, s.addend) 279 s.index += sleb[1] 280 return s 281 282 283def n_opcode_set_segment_and_offset_uleb(s: BindingState, b: 'MachO', i: int, blob: bytes) -> BindingState: 284 s.segment_index = i 285 uleb = read_uleb(blob, s.index) 286 l.debug("(n)SET_SEGMENT_AND_OFFSET_ULEB @ %#x: %d, %d", s.index, s.segment_index, uleb[0]) 287 s.index += uleb[1] 288 seg = b.segments[s.segment_index] 289 s.add_address_ov(seg.vaddr, uleb[0]) 290 s.seg_end_address = seg.vaddr + seg.memsize 291 292 return s 293 294 295def l_opcode_set_segment_and_offset_uleb(s: BindingState, b: 'MachO', i: int, blob: bytes) -> BindingState: 296 uleb = read_uleb(blob, s.index) 297 l.debug("(l)SET_SEGMENT_AND_OFFSET_ULEB @ %#x: %d, %d", s.index, i, uleb[0]) 298 seg = b.segments[i] 299 s.add_address_ov(seg.vaddr, uleb[0]) 300 s.index += uleb[1] 301 return s 302 303 304def n_opcode_add_addr_uleb(s: BindingState, _b: 'MachO', _i: int, blob: bytes) -> BindingState: 305 uleb = read_uleb(blob, s.index) 306 s.add_address_ov(s.address, uleb[0]) 307 l.debug("ADD_ADDR_ULEB @ %#x: %d", s.index, uleb[0]) 308 s.index += uleb[1] 309 return s 310 311 312def n_opcode_do_bind(s: BindingState, b: 'MachO', _i: int, _blob: bytes) -> BindingState: 313 l.debug("(n)DO_BIND @ %#x", s.index) 314 s.check_address_bounds() 315 s.bind_handler(s, b) 316 s.add_address_ov(s.address, s.sizeof_intptr_t) 317 return s 318 319 320def l_opcode_do_bind(s: BindingState, b: 'MachO', _i: int, _blob: bytes) -> BindingState: 321 l.debug("(l)DO_BIND @ %#x", s.index) 322 s.bind_handler(s, b) 323 return s 324 325 326def n_opcode_do_bind_add_addr_uleb(s: BindingState, b: 'MachO', _i: int, blob: bytes) -> BindingState: 327 uleb = read_uleb(blob, s.index) 328 l.debug("DO_BIND_ADD_ADDR_ULEB @ %#x: %d", s.index, uleb[0]) 329 if s.address >= s.seg_end_address: 330 l.error("DO_BIND_ADD_ADDR_ULEB @ %#x: address >= seg_end_address (%#x>=%#x)", 331 s.index, s.address, s.seg_end_address) 332 raise CLEInvalidBinaryError() 333 s.index += uleb[1] 334 s.bind_handler(s, b) 335 # this is done AFTER binding in preparation for the NEXT step 336 s.add_address_ov(s.address, uleb[0] + s.sizeof_intptr_t) 337 return s 338 339 340def n_opcode_do_bind_add_addr_imm_scaled(s: BindingState, b: 'MachO', i: int, _blob: bytes) -> BindingState: 341 l.debug("DO_BIND_ADD_ADDR_IMM_SCALED @ %#x: %d", s.index, i) 342 if s.address >= s.seg_end_address: 343 l.error("DO_BIND_ADD_ADDR_IMM_SCALED @ %#x: address >= seg_end_address (%#x>=%#x)", 344 s.index, s.address, s.seg_end_address) 345 raise CLEInvalidBinaryError() 346 s.bind_handler(s, b) 347 # this is done AFTER binding in preparation for the NEXT step 348 s.add_address_ov(s.address, (i * s.sizeof_intptr_t) + s.sizeof_intptr_t) 349 return s 350 351 352def n_opcode_do_bind_uleb_times_skipping_uleb(s: BindingState, b: 'MachO', _i: int, blob: bytes) -> BindingState: 353 count = read_uleb(blob, s.index) 354 s.index += count[1] 355 skip = read_uleb(blob, s.index) 356 s.index += skip[1] 357 l.debug( 358 "DO_BIND_ULEB_TIMES_SKIPPING_ULEB @ %#x: %d,%d", s.index - skip[1] - count[1], count[0], skip[0]) 359 for _ in range(0, count[0]): 360 if s.address >= s.seg_end_address: 361 l.error("DO_BIND_ADD_ADDR_IMM_SCALED @ %#x: address >= seg_end_address (%#x >= %#x)", 362 s.index - skip[1] - count[1], s.address, s.seg_end_address) 363 raise CLEInvalidBinaryError() 364 s.bind_handler(s, b) 365 s.add_address_ov(s.address, skip[0] + s.sizeof_intptr_t) 366 return s 367 368 369# default binding handler 370def default_binding_handler(state: BindingState, binary: 'MachO'): 371 """Binds location to the symbol with the given name and library ordinal 372 """ 373 374 # locate the symbol: 375 matches = binary.symbols.get_by_name_and_ordinal(state.sym_name, state.lib_ord) 376 if len(matches) > 1: 377 l.error("Cannot bind: More than one match for (%r,%d)", state.sym_name, state.lib_ord) 378 raise CLEInvalidBinaryError() 379 if len(matches) < 1: 380 l.info("No match for (%r,%d), generating BindingSymbol ...", state.sym_name, state.lib_ord) 381 matches = [BindingSymbol(binary, state.sym_name, state.lib_ord)] 382 binary.symbols.add(matches[0]) 383 binary._ordered_symbols.append(matches[0]) 384 385 symbol = matches[0] 386 location = state.address 387 388 # If the linked_addr is equal to zero, it's an imported symbol which is by that time unresolved. 389 # Don't write addend's there 390 391 value = symbol.linked_addr + state.addend if symbol.linked_addr != 0 else 0x0 392 393 if state.binding_type == 1: # POINTER 394 l.debug("Updating address %#x with symobl %r @ %#x", location, state.sym_name, value) 395 binary.memory.store( 396 AT.from_lva(location, binary).to_rva(), 397 struct.pack(binary.struct_byteorder + ("Q" if binary.arch.bits == 64 else "I"), value)) 398 symbol.bind_xrefs.append(location) 399 elif state.binding_type == 2: # ABSOLUTE32 400 location_32 = location % (2 ** 32) 401 value_32 = value % (2 ** 32) 402 l.debug("Updating address %#x with symobl %r @ %#x", state.sym_name, location_32, value_32) 403 binary.memory.store( 404 AT.from_lva(location_32, binary).to_rva(), 405 struct.pack(binary.struct_byteorder + "I", value_32)) 406 symbol.bind_xrefs.append(location_32) 407 elif state.binding_type == 3: # PCREL32 408 location_32 = location % (2 ** 32) 409 value_32 = (value - (location + 4)) % (2 ** 32) 410 l.debug("Updating address %#x with symobl %r @ %#x", state.sym_name, location_32, value_32) 411 binary.memory.store( 412 AT.from_lva(location_32, binary).to_rva(), 413 struct.pack(binary.struct_byteorder + "I", value_32)) 414 symbol.bind_xrefs.append(location_32) 415 else: 416 l.error("Unknown BIND_TYPE: %d", state.binding_type) 417 raise CLEInvalidBinaryError() 418