1import os 2import sys 3import platform 4import logging 5from collections import OrderedDict 6from typing import Optional, List 7 8import archinfo 9from archinfo.arch_soot import ArchSoot 10 11from .address_translator import AT 12from .utils import ALIGN_UP, key_bisect_floor_key, key_bisect_insort_right 13 14try: 15 import claripy 16except ImportError: 17 claripy = None 18 19__all__ = ('Loader',) 20 21l = logging.getLogger(name=__name__) 22 23 24class Loader: 25 """ 26 The loader loads all the objects and exports an abstraction of the memory of the process. What you see here is an 27 address space with loaded and rebased binaries. 28 29 :param main_binary: The path to the main binary you're loading, or a file-like object with the binary 30 in it. 31 32 The following parameters are optional. 33 34 :param auto_load_libs: Whether to automatically load shared libraries that loaded objects depend on. 35 :param load_debug_info: Whether to automatically parse DWARF data and search for debug symbol files. 36 :param concrete_target: Whether to instantiate a concrete target for a concrete execution of the process. 37 if this is the case we will need to instantiate a SimConcreteEngine that wraps the 38 ConcreteTarget provided by the user. 39 :param force_load_libs: A list of libraries to load regardless of if they're required by a loaded object. 40 :param skip_libs: A list of libraries to never load, even if they're required by a loaded object. 41 :param main_opts: A dictionary of options to be used loading the main binary. 42 :param lib_opts: A dictionary mapping library names to the dictionaries of options to be used when 43 loading them. 44 :param ld_path: A list of paths in which we can search for shared libraries. 45 :param use_system_libs: Whether or not to search the system load path for requested libraries. Default True. 46 :param ignore_import_version_numbers: 47 Whether libraries with different version numbers in the filename will be considered 48 equivalent, for example libc.so.6 and libc.so.0 49 :param case_insensitive: If this is set to True, filesystem loads will be done case-insensitively regardless of 50 the case-sensitivity of the underlying filesystem. 51 :param rebase_granularity: The alignment to use for rebasing shared objects 52 :param except_missing_libs: Throw an exception when a shared library can't be found. 53 :param aslr: Load libraries in symbolic address space. Do not use this option. 54 :param page_size: The granularity with which data is mapped into memory. Set to 1 if you are working 55 in a non-paged environment. 56 :param preload_libs: Similar to `force_load_libs` but will provide for symbol resolution, with precedence 57 over any dependencies. 58 :ivar memory: The loaded, rebased, and relocated memory of the program. 59 :vartype memory: cle.memory.Clemory 60 :ivar main_object: The object representing the main binary (i.e., the executable). 61 :ivar shared_objects: A dictionary mapping loaded library names to the objects representing them. 62 :ivar all_objects: A list containing representations of all the different objects loaded. 63 :ivar requested_names: A set containing the names of all the different shared libraries that were marked as a 64 dependency by somebody. 65 :ivar initial_load_objects: A list of all the objects that were loaded as a result of the initial load request. 66 67 When reference is made to a dictionary of options, it requires a dictionary with zero or more of the following keys: 68 69 - backend : "elf", "pe", "mach-o", "blob" : which loader backend to use 70 - arch : The archinfo.Arch object to use for the binary 71 - base_addr : The address to rebase the object at 72 - entry_point : The entry point to use for the object 73 74 More keys are defined on a per-backend basis. 75 """ 76 # _main_binary_path: str 77 memory: Optional['Clemory'] 78 main_object: Optional['Backend'] 79 tls: Optional['ThreadManager'] 80 81 def __init__(self, main_binary, auto_load_libs=True, concrete_target = None, 82 force_load_libs=(), skip_libs=(), 83 main_opts=None, lib_opts=None, ld_path=(), use_system_libs=True, 84 ignore_import_version_numbers=True, case_insensitive=False, rebase_granularity=0x100000, 85 except_missing_libs=False, aslr=False, perform_relocations=True, load_debug_info=False, 86 page_size=0x1, preload_libs=(), arch=None): 87 if hasattr(main_binary, 'seek') and hasattr(main_binary, 'read'): 88 self._main_binary_path = None 89 self._main_binary_stream = main_binary 90 else: 91 self._main_binary_path = os.path.realpath(str(main_binary)) 92 self._main_binary_stream = None 93 94 # whether we are presently in the middle of a load cycle 95 self._juggling = False 96 97 # auto_load_libs doesn't make any sense if we have a concrete target. 98 if concrete_target: 99 auto_load_libs = False 100 101 self._auto_load_libs = auto_load_libs 102 self._load_debug_info = load_debug_info 103 self._satisfied_deps = dict((x, False) for x in skip_libs) 104 self._main_opts = {} if main_opts is None else main_opts 105 self._lib_opts = {} if lib_opts is None else lib_opts 106 self._custom_ld_path = [ld_path] if type(ld_path) is str else ld_path 107 force_load_libs = [force_load_libs] if type(force_load_libs) is str else force_load_libs 108 preload_libs = [preload_libs] if type(preload_libs) is str else preload_libs 109 self._use_system_libs = use_system_libs 110 self._ignore_import_version_numbers = ignore_import_version_numbers 111 self._case_insensitive = case_insensitive 112 self._rebase_granularity = rebase_granularity 113 self._except_missing_libs = except_missing_libs 114 self._relocated_objects = set() 115 self._perform_relocations = perform_relocations 116 117 # case insensitivity setup 118 if sys.platform == 'win32': # TODO: a real check for case insensitive filesystems 119 if self._main_binary_path: self._main_binary_path = self._main_binary_path.lower() 120 force_load_libs = [x.lower() if type(x) is str else x for x in force_load_libs] 121 for x in list(self._satisfied_deps): self._satisfied_deps[x.lower()] = self._satisfied_deps[x] 122 for x in list(self._lib_opts): self._lib_opts[x.lower()] = self._lib_opts[x] 123 self._custom_ld_path = [x.lower() for x in self._custom_ld_path] 124 125 self.aslr = aslr 126 self.page_size = page_size 127 self.memory = None 128 self.main_object = None 129 self.tls = None 130 self._kernel_object = None # type: Optional[KernelObject] 131 self._extern_object = None # type: Optional[ExternObject] 132 self.shared_objects = OrderedDict() 133 self.all_objects = [] # type: List[Backend] 134 self.requested_names = set() 135 if arch is not None: 136 self._main_opts.update({'arch': arch}) 137 self.preload_libs = [] 138 self.initial_load_objects = self._internal_load(main_binary, *preload_libs, *force_load_libs, preloading=(main_binary, *preload_libs)) 139 140 # cache 141 self._last_object = None 142 143 if self._extern_object and self._extern_object._warned_data_import: 144 l.warning('For more information about "Symbol was allocated without a known size", see https://docs.angr.io/extending-angr/environment#simdata') 145 146 # Basic functions and properties 147 148 def close(self): 149 l.warning("You don't need to close the loader anymore :)") 150 151 def __repr__(self): 152 if self._main_binary_stream is None: 153 return '<Loaded %s, maps [%#x:%#x]>' % (os.path.basename(self._main_binary_path), self.min_addr, self.max_addr) 154 else: 155 return '<Loaded from stream, maps [%#x:%#x]>' % (self.min_addr, self.max_addr) 156 157 @property 158 def max_addr(self): 159 """ 160 The maximum address loaded as part of any loaded object (i.e., the whole address space). 161 """ 162 return self.all_objects[-1].max_addr 163 164 @property 165 def min_addr(self): 166 """ 167 The minimum address loaded as part of any loaded object (i.e., the whole address space). 168 """ 169 return self.all_objects[0].min_addr 170 171 @property 172 def initializers(self): 173 """ 174 Return a list of all the initializers that should be run before execution reaches the entry point, in the order 175 they should be run. 176 """ 177 return sum((x.initializers for x in self.all_objects), []) 178 179 @property 180 def finalizers(self): 181 """ 182 Return a list of all the finalizers that should be run before the program exits. 183 I'm not sure what order they should be run in. 184 """ 185 return sum((x.finalizers for x in self.all_objects), []) 186 187 @property 188 def linux_loader_object(self): 189 """ 190 If the linux dynamic loader is present in memory, return it 191 """ 192 for obj in self.all_objects: 193 if obj.provides is None: 194 continue 195 if self._is_linux_loader_name(obj.provides) is True: 196 return obj 197 return None 198 199 @property 200 def extern_object(self): 201 """ 202 Return the extern object used to provide addresses to unresolved symbols and angr internals. 203 204 Accessing this property will load this object into memory if it was not previously present. 205 206 proposed model for how multiple extern objects should work: 207 208 1) extern objects are a linked list. the one in loader._extern_object is the head of the list 209 2) each round of explicit loads generates a new extern object if it has unresolved dependencies. this object 210 has exactly the size necessary to hold all its exports. 211 3) All requests for size are passed down the chain until they reach an object which has the space to service it 212 or an object which has not yet been mapped. If all objects have been mapped and are full, a new extern object 213 is mapped with a fixed size. 214 """ 215 if self._extern_object is None: 216 if self.main_object.arch.bits < 32: 217 extern_size = 0x200 218 elif self.main_object.arch.bits == 32: 219 extern_size = 0x8000 220 else: 221 extern_size = 0x80000 222 self._extern_object = ExternObject(self, map_size=extern_size) 223 self._internal_load(self._extern_object) 224 return self._extern_object 225 226 @property 227 def kernel_object(self) -> 'KernelObject': 228 """ 229 Return the object used to provide addresses to syscalls. 230 231 Accessing this property will load this object into memory if it was not previously present. 232 """ 233 if self._kernel_object is None: 234 self._kernel_object = KernelObject(self) 235 self._map_object(self._kernel_object) 236 return self._kernel_object 237 238 @property 239 def all_elf_objects(self): 240 """ 241 Return a list of every object that was loaded from an ELF file. 242 """ 243 return [o for o in self.all_objects if isinstance(o, MetaELF)] 244 245 @property 246 def all_pe_objects(self): 247 """ 248 Return a list of every object that was loaded from an ELF file. 249 """ 250 return [o for o in self.all_objects if isinstance(o, PE)] 251 252 @property 253 def missing_dependencies(self): 254 """ 255 Return a set of every name that was requested as a shared object dependency but could not be loaded 256 """ 257 return self.requested_names - set(k for k,v in self._satisfied_deps.items() if v is not False) 258 259 @property 260 def auto_load_libs(self): 261 return self._auto_load_libs 262 263 def describe_addr(self, addr): 264 """ 265 Returns a textual description of what's in memory at the provided address 266 """ 267 o = self.find_object_containing(addr) 268 269 if o is None: 270 return 'not part of a loaded object' 271 272 options = [] 273 274 rva = AT.from_va(addr, o).to_rva() 275 276 idx = o.symbols.bisect_key_right(rva) - 1 277 while idx >= 0: 278 sym = o.symbols[idx] 279 if not sym.name or sym.is_import: 280 idx -= 1 281 continue 282 options.append((sym.relative_addr, '%s+' % sym.name)) 283 break 284 285 if isinstance(o, ELF): 286 try: 287 plt_addr, plt_name = max((a, n) for n, a in o._plt.items() if a <= rva) 288 except ValueError: 289 pass 290 else: 291 options.append((plt_addr, 'PLT.%s+' % plt_name)) 292 293 options.append((0, 'offset ')) 294 295 if o.provides: 296 objname = o.provides 297 elif o.binary: 298 objname = os.path.basename(o.binary) 299 elif self.main_object is o: 300 objname = 'main binary' 301 else: 302 objname = 'object loaded from stream' 303 304 best_offset, best_prefix = max(options, key=lambda v: v[0]) 305 return '%s%#x in %s (%#x)' % (best_prefix, rva - best_offset, objname, AT.from_va(addr, o).to_lva()) 306 307 # Search functions 308 309 def find_object(self, spec, extra_objects=()): 310 """ 311 If the given library specification has been loaded, return its object, otherwise return None. 312 """ 313 if isinstance(spec, Backend): 314 for obj in self.all_objects: 315 if obj is spec: 316 return obj 317 return None 318 319 if self._case_insensitive: 320 spec = spec.lower() 321 extra_idents = {} 322 for obj in extra_objects: 323 for ident in self._possible_idents(obj): 324 extra_idents[ident] = obj 325 326 for ident in self._possible_idents(spec): 327 if ident in self._satisfied_deps: 328 return self._satisfied_deps[ident] 329 if ident in extra_idents: 330 return extra_idents[ident] 331 332 return None 333 334 def find_object_containing(self, addr, membership_check=True): 335 """ 336 Return the object that contains the given address, or None if the address is unmapped. 337 338 :param int addr: The address that should be contained in the object. 339 :param bool membership_check: Whether a membership check should be performed or not (True by default). This 340 option can be set to False if you are certain that the target object does not 341 have "holes". 342 :return: The object or None. 343 """ 344 345 def _check_object_memory(obj_): 346 if isinstance(obj_.memory, Clemory): 347 if AT.from_va(addr, obj_).to_rva() in obj_.memory: 348 self._last_object = obj_ 349 return obj_ 350 return None 351 elif type(obj_.memory) is str: 352 self._last_object = obj_ 353 return obj_ 354 else: 355 raise CLEError('Unsupported memory type %s' % type(obj_.memory)) 356 357 # check the cache first 358 if self._last_object is not None and \ 359 self._last_object.min_addr <= addr <= self._last_object.max_addr: 360 if not membership_check: return self._last_object 361 if not self._last_object.has_memory: return self._last_object 362 o = _check_object_memory(self._last_object) 363 if o: return o 364 365 if addr > self.max_addr or addr < self.min_addr: 366 return None 367 368 obj = key_bisect_floor_key(self.all_objects, addr, keyfunc=lambda obj: obj.min_addr) 369 if obj is None: 370 return None 371 if not obj.min_addr <= addr <= obj.max_addr: 372 return None 373 if not membership_check: 374 self._last_object = obj 375 return obj 376 if not obj.has_memory: 377 self._last_object = obj 378 return obj 379 return _check_object_memory(obj) 380 381 def find_segment_containing(self, addr, skip_pseudo_objects=True): 382 """ 383 Find the section object that the address belongs to. 384 385 :param int addr: The address to test 386 :param bool skip_pseudo_objects: Skip objects that CLE adds during loading. 387 :return: The section that the address belongs to, or None if the address does not belong to any section, or if 388 section information is not available. 389 :rtype: cle.Segment 390 """ 391 392 obj = self.find_object_containing(addr, membership_check=False) 393 394 if obj is None: 395 return None 396 397 if skip_pseudo_objects and isinstance(obj, (ExternObject, KernelObject, TLSObject)): 398 # the address is from a section allocated by angr. 399 return None 400 401 return obj.find_segment_containing(addr) 402 403 def find_section_containing(self, addr, skip_pseudo_objects=True): 404 """ 405 Find the section object that the address belongs to. 406 407 :param int addr: The address to test. 408 :param bool skip_pseudo_objects: Skip objects that CLE adds during loading. 409 :return: The section that the address belongs to, or None if the address does not belong to any section, or if 410 section information is not available. 411 :rtype: cle.Section 412 """ 413 414 obj = self.find_object_containing(addr, membership_check=False) 415 416 if obj is None: 417 return None 418 419 if skip_pseudo_objects and isinstance(obj, (ExternObject, KernelObject, TLSObject)): 420 # the address is from a special CLE section 421 return None 422 423 return obj.find_section_containing(addr) 424 425 def find_section_next_to(self, addr, skip_pseudo_objects=True): 426 """ 427 Find the next section after the given address. 428 429 :param int addr: The address to test. 430 :param bool skip_pseudo_objects: Skip objects that CLE adds during loading. 431 :return: The next section that goes after the given address, or None if there is no section after the address, 432 or if section information is not available. 433 :rtype: cle.Section 434 """ 435 436 obj = self.find_object_containing(addr, membership_check=False) 437 438 if obj is None: 439 return None 440 441 if skip_pseudo_objects and isinstance(obj, (ExternObject, KernelObject, TLSObject)): 442 # the address is from a special CLE section 443 return None 444 445 return obj.sections.find_region_next_to(addr) 446 447 def find_symbol(self, thing, fuzzy=False): 448 """ 449 Search for the symbol with the given name or address. 450 451 :param thing: Either the name or address of a symbol to look up 452 :param fuzzy: Set to True to return the first symbol before or at the given address 453 454 :returns: A :class:`cle.backends.Symbol` object if found, None otherwise. 455 """ 456 if type(thing) is archinfo.arch_soot.SootAddressDescriptor: 457 # Soot address 458 return thing.method.fullname 459 elif type(thing) is int: 460 # address 461 if fuzzy: 462 so = self.find_object_containing(thing) 463 if so is None: 464 return None 465 objs = [so] 466 else: 467 objs = self.all_objects 468 469 for so in objs: 470 idx = so.symbols.bisect_key_right(AT.from_mva(thing, so).to_rva()) - 1 471 while idx >= 0 and (fuzzy or so.symbols[idx].rebased_addr == thing): 472 if so.symbols[idx].is_import: 473 idx -= 1 474 continue 475 return so.symbols[idx] 476 else: 477 # name 478 for so in self.all_objects: 479 if so is self._extern_object: 480 continue 481 sym = so.get_symbol(thing) 482 if sym is None: 483 continue 484 485 if sym.is_import: 486 if sym.resolvedby is not None: 487 if sym.resolvedby.is_forward and sym.resolvedby.resolvedby is not None: 488 return sym.resolvedby.resolvedby 489 return sym.resolvedby 490 else: 491 if sym.is_forward and sym.resolvedby is not None: 492 return sym.resolvedby 493 return sym 494 495 if self._extern_object is not None: 496 sym = self.extern_object.get_symbol(thing) 497 if sym is not None: 498 return sym 499 500 return None 501 502 @property 503 def symbols(self): 504 peeks = [] 505 for so in self.all_objects: 506 if so.symbols: 507 i = iter(so.symbols) 508 n = next(i) 509 peeks.append((n, i)) 510 while peeks: 511 element = min(peeks, key=lambda x: x[0].rebased_addr) # if we don't do this it might crash on comparing iterators 512 n, i = element 513 idx = peeks.index(element) 514 yield n 515 try: 516 peeks[idx] = next(i), i 517 except StopIteration: 518 peeks.pop(idx) 519 520 def find_all_symbols(self, name, exclude_imports=True, exclude_externs=False, exclude_forwards=True): 521 """ 522 Iterate over all symbols present in the set of loaded binaries that have the given name 523 524 :param name: The name to search for 525 :param exclude_imports: Whether to exclude import symbols. Default True. 526 :param exclude_externs: Whether to exclude symbols in the extern object. Default False. 527 :param exclude_forwards: Whether to exclude forward symbols. Default True. 528 """ 529 for so in self.all_objects: 530 sym = so.get_symbol(name) 531 if sym is None: 532 continue 533 if sym.is_import and exclude_imports: 534 continue 535 if sym.owner is self._extern_object and exclude_externs: 536 continue 537 if sym.is_forward and exclude_forwards: 538 continue 539 540 yield sym 541 542 def find_plt_stub_name(self, addr): 543 """ 544 Return the name of the PLT stub starting at ``addr``. 545 """ 546 so = self.find_object_containing(addr) 547 if so is not None and isinstance(so, MetaELF): 548 return so.reverse_plt.get(addr, None) 549 return None 550 551 def find_relevant_relocations(self, name): 552 """ 553 Iterate through all the relocations referring to the symbol with the given ``name`` 554 """ 555 for so in self.all_objects: 556 for reloc in so.relocs: 557 if reloc.symbol is not None: 558 if reloc.symbol.name == name: 559 yield reloc 560 561 # Complicated stuff 562 563 def perform_irelative_relocs(self, resolver_func): 564 """ 565 Use this method to satisfy ``IRelative`` relocations in the binary that require execution of loaded code. 566 567 Note that this does NOT handle ``IFunc`` symbols, which must be handled separately. (this could be changed, but 568 at the moment it's desirable to support lazy IFunc resolution, since emulation is usually slow) 569 570 :param resolver_func: A callback function that takes an address, runs the code at that address, and returns 571 the return value from the emulated function. 572 """ 573 for obj in self.all_objects: 574 for resolver, dest in obj.irelatives: 575 val = resolver_func(resolver) 576 if val is not None: 577 obj.memory.pack_word(dest, val) 578 579 def dynamic_load(self, spec): 580 """ 581 Load a file into the address space. Note that the sematics of ``auto_load_libs`` and ``except_missing_libs`` 582 apply at all times. 583 584 :param spec: The path to the file to load. May be an absolute path, a relative path, or a name to search in 585 the load path. 586 587 :return: A list of all the objects successfully loaded, which may be empty if this object was previously 588 loaded. If the object specified in ``spec`` failed to load for any reason, including the file 589 not being found, return None. 590 """ 591 try: 592 return self._internal_load(spec) 593 except CLEFileNotFoundError as e: 594 l.warning("Dynamic load failed: %r", e) 595 return None 596 597 def get_loader_symbolic_constraints(self): 598 """ 599 Do not use this method. 600 """ 601 if not self.aslr: 602 return [] 603 if not claripy: 604 l.error("Please install claripy to get symbolic constraints") 605 return [] 606 outputlist = [] 607 for obj in self.all_objects: 608 #TODO Fix Symbolic for tls whatever 609 if obj.aslr and isinstance(obj.mapped_base_symbolic, claripy.ast.BV): 610 outputlist.append(obj.mapped_base_symbolic == obj.mapped_base) 611 return outputlist 612 613 614 # Private stuff 615 616 @staticmethod 617 def _is_linux_loader_name(name): 618 """ 619 ld can have different names such as ld-2.19.so or ld-linux-x86-64.so.2 depending on symlinks and whatnot. 620 This determines if `name` is a suitable candidate for ld. 621 """ 622 return 'ld.so' in name or 'ld64.so' in name or 'ld-linux' in name 623 624 def _internal_load(self, *args, preloading=()): 625 """ 626 Pass this any number of files or libraries to load. If it can't load any of them for any reason, it will 627 except out. Note that the semantics of ``auto_load_libs`` and ``except_missing_libs`` apply at all times. 628 629 It will return a list of all the objects successfully loaded, which may be smaller than the list you provided 630 if any of them were previously loaded. 631 632 The ``main_binary`` has to come first, followed by any additional libraries to load this round. To create the 633 effect of "preloading", i.e. ensuring symbols are resolved to preloaded libraries ahead of any others, pass 634 ``preloading`` as a list of identifiers which should be considered preloaded. Note that the identifiers will 635 be compared using object identity. 636 """ 637 # ideal loading pipeline: 638 # - load everything, independently and recursively until dependencies are satisfied 639 # - resolve symbol-based dependencies 640 # - layout address space, including (as a prerequisite) coming up with the layout for tls and externs 641 # - map everything into memory 642 # - perform relocations 643 644 # STEP 1 645 # Load everything. for each binary, load it in isolation so we end up with a Backend instance. 646 # If auto_load_libs is on, do this iteratively until all dependencies is satisfied 647 objects = [] 648 preload_objects = [] 649 dependencies = [] 650 cached_failures = set() # this assumes that the load path is global and immutable by the time we enter this func 651 652 for main_spec in args: 653 is_preloading = any(spec is main_spec for spec in preloading) 654 if self.find_object(main_spec, extra_objects=objects) is not None: 655 l.info("Skipping load request %s - already loaded", main_spec) 656 continue 657 obj = self._load_object_isolated(main_spec) 658 objects.append(obj) 659 objects.extend(obj.child_objects) 660 dependencies.extend(obj.deps) 661 662 if self.main_object is None: 663 # this is technically the first place we can start to initialize things based on platform 664 self.main_object = obj 665 self.memory = Clemory(obj.arch, root=True) 666 667 chk_obj = self.main_object if isinstance(self.main_object, ELFCore) or not self.main_object.child_objects else self.main_object.child_objects[0] 668 if isinstance(chk_obj, ELFCore): 669 self.tls = ELFCoreThreadManager(self, obj.arch) 670 elif isinstance(obj, Minidump): 671 self.tls = MinidumpThreadManager(self, obj.arch) 672 elif isinstance(chk_obj, MetaELF): 673 self.tls = ELFThreadManager(self, obj.arch) 674 elif isinstance(chk_obj, PE): 675 self.tls = PEThreadManager(self, obj.arch) 676 else: 677 self.tls = ThreadManager(self, obj.arch) 678 679 elif is_preloading: 680 self.preload_libs.append(obj) 681 preload_objects.append(obj) 682 683 684 while self._auto_load_libs and dependencies: 685 spec = dependencies.pop(0) 686 if spec in cached_failures: 687 l.debug("Skipping implicit dependency %s - cached failure", spec) 688 continue 689 if self.find_object(spec, extra_objects=objects) is not None: 690 l.debug("Skipping implicit dependency %s - already loaded", spec) 691 continue 692 693 try: 694 l.info("Loading %s...", spec) 695 obj = self._load_object_isolated(spec) # loading dependencies 696 except CLEFileNotFoundError: 697 l.info("... not found") 698 cached_failures.add(spec) 699 if self._except_missing_libs: 700 raise 701 continue 702 703 objects.append(obj) 704 objects.extend(obj.child_objects) 705 dependencies.extend(obj.deps) 706 707 if type(self.tls) is ThreadManager: # ... java 708 if isinstance(obj, MetaELF): 709 self.tls = ELFThreadManager(self, obj.arch) 710 elif isinstance(obj, PE): 711 self.tls = PEThreadManager(self, obj.arch) 712 713 # STEP 1.5 714 # produce dependency-ordered list of objects and soname map 715 716 ordered_objects = [] 717 soname_mapping = OrderedDict((obj.provides if not self._ignore_import_version_numbers else obj.provides.rstrip('.0123456789'), obj) for obj in objects if obj.provides) 718 seen = set() 719 def visit(obj): 720 if id(obj) in seen: 721 return 722 seen.add(id(obj)) 723 724 stripped_deps = [dep if not self._ignore_import_version_numbers else dep.rstrip('.0123456789') for dep in obj.deps] 725 dep_objs = [soname_mapping[dep_name] for dep_name in stripped_deps if dep_name in soname_mapping] 726 for dep_obj in dep_objs: 727 visit(dep_obj) 728 729 ordered_objects.append(obj) 730 731 for obj in preload_objects + objects: 732 visit(obj) 733 734 # STEP 2 735 # Resolve symbol dependencies. Create an unmapped extern object, which may not be used 736 # after this step, everything should have the appropriate references to each other and the extern 737 # object should have all the space it needs allocated 738 739 extern_obj = ExternObject(self) 740 741 # tls registration 742 for obj in objects: 743 self.tls.register_object(obj) 744 745 # link everything 746 if self._perform_relocations: 747 for obj in ordered_objects: 748 l.info("Linking %s", obj.binary) 749 sibling_objs = list(obj.parent_object.child_objects) if obj.parent_object is not None else [] 750 stripped_deps = [dep if not self._ignore_import_version_numbers else dep.rstrip('.0123456789') for dep in obj.deps] 751 dep_objs = [soname_mapping[dep_name] for dep_name in stripped_deps if dep_name in soname_mapping] 752 main_objs = [self.main_object] if self.main_object is not obj else [] 753 for reloc in obj.relocs: 754 reloc.resolve_symbol(main_objs + preload_objects + sibling_objs + dep_objs + [obj], extern_object=extern_obj) 755 756 # if the extern object was used, add it to the list of objects we're mapping 757 # also add it to the linked list of extern objects 758 if extern_obj.map_size: 759 # resolve the extern relocs this way because they may produce more relocations as we go 760 i = 0 761 while i < len(extern_obj.relocs): 762 extern_obj.relocs[i].resolve_symbol(objects, extern_object=extern_obj) 763 i += 1 764 765 objects.append(extern_obj) 766 ordered_objects.insert(0, extern_obj) 767 extern_obj._next_object = self._extern_object 768 self._extern_object = extern_obj 769 770 extern_obj._finalize_tls() 771 self.tls.register_object(extern_obj) 772 773 # STEP 3 774 # Map everything to memory 775 for obj in objects: 776 self._map_object(obj) 777 778 # STEP 4 779 # Perform relocations 780 if self._perform_relocations: 781 for obj in ordered_objects: 782 obj.relocate() 783 784 # Step 5 785 # Insert each object into the appropriate mappings for lookup by name 786 for obj in objects: 787 self.requested_names.update(obj.deps) 788 for ident in self._possible_idents(obj): 789 self._satisfied_deps[ident] = obj 790 791 if obj.provides is not None: 792 self.shared_objects[obj.provides] = obj 793 794 return objects 795 796 def _load_object_isolated(self, spec): 797 """ 798 Given a partial specification of a dependency, this will return the loaded object as a backend instance. 799 It will not touch any loader-global data. 800 """ 801 # STEP 1: identify file 802 if isinstance(spec, Backend): 803 return spec 804 elif hasattr(spec, 'read') and hasattr(spec, 'seek'): 805 binary_stream = spec 806 binary = None 807 close = False 808 elif type(spec) in (bytes, str): 809 binary = self._search_load_path(spec) # this is allowed to cheat and do partial static loading 810 l.debug("... using full path %s", binary) 811 binary_stream = open(binary, 'rb') 812 close = True 813 else: 814 raise CLEError("Bad library specification: %s" % spec) 815 816 try: 817 # STEP 2: collect options 818 if self.main_object is None: 819 options = dict(self._main_opts) 820 else: 821 for ident in self._possible_idents(binary_stream if binary is None else binary): # also allowed to cheat 822 if ident in self._lib_opts: 823 options = dict(self._lib_opts[ident]) 824 break 825 else: 826 options = {} 827 828 # STEP 3: identify backend 829 backend_spec = options.pop('backend', None) 830 backend_cls = self._backend_resolver(backend_spec) 831 if backend_cls is None: 832 backend_cls = self._static_backend(binary_stream if binary is None else binary) 833 if backend_cls is None: 834 raise CLECompatibilityError("Unable to find a loader backend for %s. Perhaps try the 'blob' loader?" % spec) 835 836 # STEP 4: LOAD! 837 l.debug("... loading with %s", backend_cls) 838 839 result = backend_cls(binary, binary_stream, is_main_bin=self.main_object is None, loader=self, **options) 840 result.close() 841 return result 842 finally: 843 if close: 844 binary_stream.close() 845 846 def _map_object(self, obj): 847 """ 848 This will integrate the object into the global address space, but will not perform relocations. 849 """ 850 obj_size = obj.max_addr - obj.min_addr + 1 851 852 if obj.pic: 853 if obj._custom_base_addr is not None and self._is_range_free(obj._custom_base_addr, obj_size): 854 base_addr = obj._custom_base_addr 855 elif obj.linked_base and self._is_range_free(obj.linked_base, obj_size): 856 base_addr = obj.linked_base 857 elif not obj.is_main_bin: 858 base_addr = self._find_safe_rebase_addr(obj_size) 859 else: 860 l.warning("The main binary is a position-independent executable. " 861 "It is being loaded with a base address of 0x400000.") 862 base_addr = 0x400000 863 864 obj.rebase(base_addr) 865 else: 866 if obj._custom_base_addr is not None and not isinstance(obj, Blob): 867 l.warning("%s: base_addr was specified but the object is not PIC. " 868 "specify force_rebase=True to override", obj.binary_basename) 869 base_addr = obj.linked_base 870 if not self._is_range_free(obj.linked_base, obj_size): 871 raise CLEError("Position-DEPENDENT object %s cannot be loaded at %#x"% (obj.binary, base_addr)) 872 873 assert obj.mapped_base >= 0 874 875 if obj.has_memory: 876 assert obj.min_addr <= obj.max_addr 877 l.info("Mapping %s at %#x", obj.binary, base_addr) 878 self.memory.add_backer(base_addr, obj.memory) 879 obj._is_mapped = True 880 key_bisect_insort_right(self.all_objects, obj, keyfunc=lambda o: o.min_addr) 881 882 # Address space management 883 884 def _find_safe_rebase_addr(self, size): 885 """ 886 Return a "safe" virtual address to map an object of size ``size``, i.e. one that won't 887 overlap with anything already loaded. 888 """ 889 # this assumes that self.main_object exists, which should... definitely be safe 890 if self.main_object.arch.bits < 32 or self.main_object.max_addr >= 2**(self.main_object.arch.bits-1): 891 # HACK: On small arches, we should be more aggressive in packing stuff in. 892 gap_start = 0 893 else: 894 gap_start = ALIGN_UP(self.main_object.max_addr + 1, self._rebase_granularity) 895 for o in self.all_objects: 896 if gap_start + size <= o.min_addr: 897 break 898 else: 899 gap_start = ALIGN_UP(o.max_addr + 1, self._rebase_granularity) 900 901 if gap_start + size >= 2**self.main_object.arch.bits: 902 raise CLEOperationError("Ran out of room in address space") 903 904 return gap_start 905 906 def _is_range_free(self, va, size): 907 # self.main_object should not be None here 908 if va < 0 or va + size >= 2**self.main_object.arch.bits: 909 return False 910 911 for o in self.all_objects: 912 if o.min_addr <= va <= o.max_addr or va <= o.min_addr < va + size: 913 return False 914 915 return True 916 917 # Functions of the form "use some heuristic to tell me about this spec" 918 919 def _search_load_path(self, spec): 920 """ 921 This will return the most likely full path that could satisfy the given partial specification. 922 923 It will prefer files of a known filetype over files of an unknown filetype. 924 """ 925 # this could be converted to being an iterator pretty easily 926 for path in self._possible_paths(spec): 927 if self.main_object is not None: 928 backend_cls = self._static_backend(path) 929 if backend_cls is None: 930 continue 931 # If arch of main object is Soot ... 932 if isinstance(self.main_object.arch, ArchSoot): 933 # ... skip compatibility check, since it always evaluates to false 934 # with native libraries (which are the only valid dependencies) 935 return path 936 if not backend_cls.check_compatibility(path, self.main_object): 937 continue 938 939 return path 940 941 raise CLEFileNotFoundError("Could not find file %s" % spec) 942 943 def _possible_paths(self, spec): 944 """ 945 This iterates through each possible path that could possibly be used to satisfy the specification. 946 947 The only check performed is whether the file exists or not. 948 """ 949 dirs = [] 950 dirs.extend(self._custom_ld_path) # if we say dirs = blah, we modify the original 951 952 if self.main_object is not None: 953 # add path of main binary 954 if self.main_object.binary is not None: 955 dirs.append(os.path.dirname(self.main_object.binary)) 956 # if arch of main_object is Soot ... 957 is_arch_soot = isinstance(self.main_object.arch, ArchSoot) 958 if is_arch_soot: 959 # ... extend with load path of native libraries 960 dirs.extend(self.main_object.extra_load_path) 961 if self._use_system_libs: 962 l.debug("Path to system libraries (usually added as dependencies of JNI libs) needs " 963 "to be specified manually, by using the custom_ld_path option.") 964 # add path of system libraries 965 if self._use_system_libs and not is_arch_soot: 966 # Ideally this should be taken into account for each shared 967 # object, not just the main object. 968 dirs.extend(self.main_object.extra_load_path) 969 if sys.platform.startswith('linux'): 970 dirs.extend(self.main_object.arch.library_search_path()) 971 elif sys.platform == 'win32': 972 native_dirs = os.environ['PATH'].split(';') 973 974 # simulate the wow64 filesystem redirect, working around the fact that WE may be impacted by it as 975 # a 32-bit python process....... 976 python_is_32bit = platform.architecture()[0] == '32bit' 977 guest_is_32bit = self.main_object.arch.bits == 32 978 979 if python_is_32bit != guest_is_32bit: 980 redirect_dir = os.path.join(os.environ['SystemRoot'], 'system32').lower() 981 target_dir = os.path.join(os.environ['SystemRoot'], 'SysWOW64' if guest_is_32bit else 'sysnative') 982 i = 0 983 while i < len(native_dirs): 984 if native_dirs[i].lower().startswith(redirect_dir): 985 # replace the access to System32 with SysWOW64 or sysnative 986 native_dirs[i] = target_dir + native_dirs[i][len(target_dir):] 987 i += 1 988 989 dirs.extend(native_dirs) 990 991 dirs.append('.') 992 993 994 if self._case_insensitive: 995 spec = spec.lower() 996 997 for libdir in dirs: 998 if self._case_insensitive: 999 insensitive_path = self._path_insensitive(os.path.join(libdir, spec)) 1000 if insensitive_path is not None: 1001 yield os.path.realpath(insensitive_path) 1002 else: 1003 fullpath = os.path.realpath(os.path.join(libdir, spec)) 1004 if os.path.exists(fullpath): 1005 yield fullpath 1006 1007 if self._ignore_import_version_numbers: 1008 try: 1009 for libname in os.listdir(libdir): 1010 ilibname = libname.lower() if self._case_insensitive else libname 1011 if ilibname.strip('.0123456789') == spec.strip('.0123456789'): 1012 yield os.path.realpath(os.path.join(libdir, libname)) 1013 except (IOError, OSError): pass 1014 1015 @classmethod 1016 def _path_insensitive(cls, path): 1017 """ 1018 Get a case-insensitive path for use on a case sensitive system, or return None if it doesn't exist. 1019 1020 From https://stackoverflow.com/a/8462613 1021 """ 1022 if path == '' or os.path.exists(path): 1023 return path 1024 base = os.path.basename(path) # may be a directory or a file 1025 dirname = os.path.dirname(path) 1026 suffix = '' 1027 if not base: # dir ends with a slash? 1028 if len(dirname) < len(path): 1029 suffix = path[:len(path) - len(dirname)] 1030 base = os.path.basename(dirname) 1031 dirname = os.path.dirname(dirname) 1032 if not os.path.exists(dirname): 1033 dirname = cls._path_insensitive(dirname) 1034 if not dirname: 1035 return None 1036 # at this point, the directory exists but not the file 1037 try: # we are expecting dirname to be a directory, but it could be a file 1038 files = os.listdir(dirname) 1039 except OSError: 1040 return None 1041 baselow = base.lower() 1042 try: 1043 basefinal = next(fl for fl in files if fl.lower() == baselow) 1044 except StopIteration: 1045 return None 1046 if basefinal: 1047 return os.path.join(dirname, basefinal) + suffix 1048 else: 1049 return None 1050 1051 def _possible_idents(self, spec, lowercase=False): 1052 """ 1053 This iterates over all the possible identifiers that could be used to describe the given specification. 1054 """ 1055 if isinstance(spec, Backend): 1056 if spec.provides is not None: 1057 yield spec.provides 1058 if self._ignore_import_version_numbers: 1059 yield spec.provides.rstrip('.0123456789') 1060 if spec.binary: 1061 yield spec.binary 1062 yield os.path.basename(spec.binary) 1063 yield os.path.basename(spec.binary).split('.')[0] 1064 if self._ignore_import_version_numbers: 1065 yield os.path.basename(spec.binary).rstrip('.0123456789') 1066 elif hasattr(spec, 'read') and hasattr(spec, 'seek'): 1067 backend_cls = self._static_backend(spec, ignore_hints=True) 1068 if backend_cls is not None: 1069 soname = backend_cls.extract_soname(spec) 1070 if soname is not None: 1071 yield soname 1072 if self._ignore_import_version_numbers: 1073 yield soname.rstrip('.0123456789') 1074 elif type(spec) in (bytes, str): 1075 yield spec 1076 yield os.path.basename(spec) 1077 yield os.path.basename(spec).split('.')[0] 1078 if self._ignore_import_version_numbers: 1079 yield os.path.basename(spec).rstrip('.0123456789') 1080 1081 if os.path.exists(spec): 1082 backend_cls = self._static_backend(spec, ignore_hints=True) 1083 if backend_cls is not None: 1084 soname = backend_cls.extract_soname(spec) 1085 if soname is not None: 1086 yield soname 1087 if self._ignore_import_version_numbers: 1088 yield soname.rstrip('.0123456789') 1089 1090 if not lowercase and (sys.platform == 'win32' or self._case_insensitive): 1091 for name in self._possible_idents(spec, lowercase=True): 1092 yield name.lower() 1093 1094 def _static_backend(self, spec, ignore_hints=False): 1095 """ 1096 Returns the correct loader for the file at `spec`. 1097 Returns None if it's a blob or some unknown type. 1098 TODO: Implement some binwalk-like thing to carve up blobs automatically 1099 """ 1100 1101 if not ignore_hints: 1102 for ident in self._possible_idents(spec): 1103 try: 1104 return self._backend_resolver(self._lib_opts[ident]['backend']) 1105 except KeyError: 1106 pass 1107 1108 with stream_or_path(spec) as stream: 1109 for rear in ALL_BACKENDS.values(): 1110 if rear.is_default and rear.is_compatible(stream): 1111 return rear 1112 1113 return None 1114 1115 @staticmethod 1116 def _backend_resolver(backend, default=None): 1117 if isinstance(backend, type) and issubclass(backend, Backend): 1118 return backend 1119 elif backend in ALL_BACKENDS: 1120 return ALL_BACKENDS[backend] 1121 elif backend is None: 1122 return default 1123 else: 1124 raise CLEError('Invalid backend: %s' % backend) 1125 1126 1127from .errors import CLEError, CLEFileNotFoundError, CLECompatibilityError, CLEOperationError 1128from .memory import Clemory 1129from .backends import MetaELF, ELF, PE, ELFCore, Minidump, Blob, ALL_BACKENDS, Backend 1130from .backends.tls import ThreadManager, ELFThreadManager, PEThreadManager, ELFCoreThreadManager, MinidumpThreadManager, TLSObject 1131from .backends.externs import ExternObject, KernelObject 1132from .utils import stream_or_path 1133