1import os
2import sys
3import platform
4import logging
5from collections import OrderedDict
6from typing import Optional, List
7
8import archinfo
9from archinfo.arch_soot import ArchSoot
10
11from .address_translator import AT
12from .utils import ALIGN_UP, key_bisect_floor_key, key_bisect_insort_right
13
14try:
15    import claripy
16except ImportError:
17    claripy = None
18
19__all__ = ('Loader',)
20
21l = logging.getLogger(name=__name__)
22
23
24class Loader:
25    """
26    The loader loads all the objects and exports an abstraction of the memory of the process. What you see here is an
27    address space with loaded and rebased binaries.
28
29    :param main_binary:         The path to the main binary you're loading, or a file-like object with the binary
30                                in it.
31
32    The following parameters are optional.
33
34    :param auto_load_libs:      Whether to automatically load shared libraries that loaded objects depend on.
35    :param load_debug_info:     Whether to automatically parse DWARF data and search for debug symbol files.
36    :param concrete_target:     Whether to instantiate a concrete target for a concrete execution of the process.
37                                if this is the case we will need to instantiate a SimConcreteEngine that wraps the
38                                ConcreteTarget provided by the user.
39    :param force_load_libs:     A list of libraries to load regardless of if they're required by a loaded object.
40    :param skip_libs:           A list of libraries to never load, even if they're required by a loaded object.
41    :param main_opts:           A dictionary of options to be used loading the main binary.
42    :param lib_opts:            A dictionary mapping library names to the dictionaries of options to be used when
43                                loading them.
44    :param ld_path:      A list of paths in which we can search for shared libraries.
45    :param use_system_libs:     Whether or not to search the system load path for requested libraries. Default True.
46    :param ignore_import_version_numbers:
47                                Whether libraries with different version numbers in the filename will be considered
48                                equivalent, for example libc.so.6 and libc.so.0
49    :param case_insensitive:    If this is set to True, filesystem loads will be done case-insensitively regardless of
50                                the case-sensitivity of the underlying filesystem.
51    :param rebase_granularity:  The alignment to use for rebasing shared objects
52    :param except_missing_libs: Throw an exception when a shared library can't be found.
53    :param aslr:                Load libraries in symbolic address space. Do not use this option.
54    :param page_size:           The granularity with which data is mapped into memory. Set to 1 if you are working
55                                in a non-paged environment.
56    :param preload_libs:        Similar to `force_load_libs` but will provide for symbol resolution, with precedence
57                                over any dependencies.
58    :ivar memory:               The loaded, rebased, and relocated memory of the program.
59    :vartype memory:            cle.memory.Clemory
60    :ivar main_object:          The object representing the main binary (i.e., the executable).
61    :ivar shared_objects:       A dictionary mapping loaded library names to the objects representing them.
62    :ivar all_objects:          A list containing representations of all the different objects loaded.
63    :ivar requested_names:      A set containing the names of all the different shared libraries that were marked as a
64                                dependency by somebody.
65    :ivar initial_load_objects: A list of all the objects that were loaded as a result of the initial load request.
66
67    When reference is made to a dictionary of options, it requires a dictionary with zero or more of the following keys:
68
69    - backend :             "elf", "pe", "mach-o", "blob" : which loader backend to use
70    - arch :                The archinfo.Arch object to use for the binary
71    - base_addr :           The address to rebase the object at
72    - entry_point :         The entry point to use for the object
73
74    More keys are defined on a per-backend basis.
75    """
76    # _main_binary_path: str
77    memory: Optional['Clemory']
78    main_object: Optional['Backend']
79    tls: Optional['ThreadManager']
80
81    def __init__(self, main_binary, auto_load_libs=True, concrete_target = None,
82                 force_load_libs=(), skip_libs=(),
83                 main_opts=None, lib_opts=None, ld_path=(), use_system_libs=True,
84                 ignore_import_version_numbers=True, case_insensitive=False, rebase_granularity=0x100000,
85                 except_missing_libs=False, aslr=False, perform_relocations=True, load_debug_info=False,
86                 page_size=0x1, preload_libs=(), arch=None):
87        if hasattr(main_binary, 'seek') and hasattr(main_binary, 'read'):
88            self._main_binary_path = None
89            self._main_binary_stream = main_binary
90        else:
91            self._main_binary_path = os.path.realpath(str(main_binary))
92            self._main_binary_stream = None
93
94        # whether we are presently in the middle of a load cycle
95        self._juggling = False
96
97        # auto_load_libs doesn't make any sense if we have a concrete target.
98        if concrete_target:
99            auto_load_libs = False
100
101        self._auto_load_libs = auto_load_libs
102        self._load_debug_info = load_debug_info
103        self._satisfied_deps = dict((x, False) for x in skip_libs)
104        self._main_opts = {} if main_opts is None else main_opts
105        self._lib_opts = {} if lib_opts is None else lib_opts
106        self._custom_ld_path = [ld_path] if type(ld_path) is str else ld_path
107        force_load_libs = [force_load_libs] if type(force_load_libs) is str else force_load_libs
108        preload_libs = [preload_libs] if type(preload_libs) is str else preload_libs
109        self._use_system_libs = use_system_libs
110        self._ignore_import_version_numbers = ignore_import_version_numbers
111        self._case_insensitive = case_insensitive
112        self._rebase_granularity = rebase_granularity
113        self._except_missing_libs = except_missing_libs
114        self._relocated_objects = set()
115        self._perform_relocations = perform_relocations
116
117        # case insensitivity setup
118        if sys.platform == 'win32': # TODO: a real check for case insensitive filesystems
119            if self._main_binary_path: self._main_binary_path = self._main_binary_path.lower()
120            force_load_libs = [x.lower() if type(x) is str else x for x in force_load_libs]
121            for x in list(self._satisfied_deps): self._satisfied_deps[x.lower()] = self._satisfied_deps[x]
122            for x in list(self._lib_opts): self._lib_opts[x.lower()] = self._lib_opts[x]
123            self._custom_ld_path = [x.lower() for x in self._custom_ld_path]
124
125        self.aslr = aslr
126        self.page_size = page_size
127        self.memory = None
128        self.main_object = None
129        self.tls = None
130        self._kernel_object = None # type: Optional[KernelObject]
131        self._extern_object = None # type: Optional[ExternObject]
132        self.shared_objects = OrderedDict()
133        self.all_objects = []  # type: List[Backend]
134        self.requested_names = set()
135        if arch is not None:
136            self._main_opts.update({'arch': arch})
137        self.preload_libs = []
138        self.initial_load_objects = self._internal_load(main_binary, *preload_libs, *force_load_libs, preloading=(main_binary, *preload_libs))
139
140        # cache
141        self._last_object = None
142
143        if self._extern_object and self._extern_object._warned_data_import:
144            l.warning('For more information about "Symbol was allocated without a known size", see https://docs.angr.io/extending-angr/environment#simdata')
145
146    # Basic functions and properties
147
148    def close(self):
149        l.warning("You don't need to close the loader anymore :)")
150
151    def __repr__(self):
152        if self._main_binary_stream is None:
153            return '<Loaded %s, maps [%#x:%#x]>' % (os.path.basename(self._main_binary_path), self.min_addr, self.max_addr)
154        else:
155            return '<Loaded from stream, maps [%#x:%#x]>' % (self.min_addr, self.max_addr)
156
157    @property
158    def max_addr(self):
159        """
160        The maximum address loaded as part of any loaded object (i.e., the whole address space).
161        """
162        return self.all_objects[-1].max_addr
163
164    @property
165    def min_addr(self):
166        """
167        The minimum address loaded as part of any loaded object (i.e., the whole address space).
168        """
169        return self.all_objects[0].min_addr
170
171    @property
172    def initializers(self):
173        """
174        Return a list of all the initializers that should be run before execution reaches the entry point, in the order
175        they should be run.
176        """
177        return sum((x.initializers for x in self.all_objects), [])
178
179    @property
180    def finalizers(self):
181        """
182        Return a list of all the finalizers that should be run before the program exits.
183        I'm not sure what order they should be run in.
184        """
185        return sum((x.finalizers for x in self.all_objects), [])
186
187    @property
188    def linux_loader_object(self):
189        """
190        If the linux dynamic loader is present in memory, return it
191        """
192        for obj in self.all_objects:
193            if obj.provides is None:
194                continue
195            if self._is_linux_loader_name(obj.provides) is True:
196                return obj
197        return None
198
199    @property
200    def extern_object(self):
201        """
202        Return the extern object used to provide addresses to unresolved symbols and angr internals.
203
204        Accessing this property will load this object into memory if it was not previously present.
205
206        proposed model for how multiple extern objects should work:
207
208            1) extern objects are a linked list. the one in loader._extern_object is the head of the list
209            2) each round of explicit loads generates a new extern object if it has unresolved dependencies. this object
210               has exactly the size necessary to hold all its exports.
211            3) All requests for size are passed down the chain until they reach an object which has the space to service it
212               or an object which has not yet been mapped. If all objects have been mapped and are full, a new extern object
213               is mapped with a fixed size.
214        """
215        if self._extern_object is None:
216            if self.main_object.arch.bits < 32:
217                extern_size = 0x200
218            elif self.main_object.arch.bits == 32:
219                extern_size = 0x8000
220            else:
221                extern_size = 0x80000
222            self._extern_object = ExternObject(self, map_size=extern_size)
223            self._internal_load(self._extern_object)
224        return self._extern_object
225
226    @property
227    def kernel_object(self) -> 'KernelObject':
228        """
229        Return the object used to provide addresses to syscalls.
230
231        Accessing this property will load this object into memory if it was not previously present.
232        """
233        if self._kernel_object is None:
234            self._kernel_object = KernelObject(self)
235            self._map_object(self._kernel_object)
236        return self._kernel_object
237
238    @property
239    def all_elf_objects(self):
240        """
241        Return a list of every object that was loaded from an ELF file.
242        """
243        return [o for o in self.all_objects if isinstance(o, MetaELF)]
244
245    @property
246    def all_pe_objects(self):
247        """
248        Return a list of every object that was loaded from an ELF file.
249        """
250        return [o for o in self.all_objects if isinstance(o, PE)]
251
252    @property
253    def missing_dependencies(self):
254        """
255        Return a set of every name that was requested as a shared object dependency but could not be loaded
256        """
257        return self.requested_names - set(k for k,v in self._satisfied_deps.items() if v is not False)
258
259    @property
260    def auto_load_libs(self):
261        return self._auto_load_libs
262
263    def describe_addr(self, addr):
264        """
265        Returns a textual description of what's in memory at the provided address
266        """
267        o = self.find_object_containing(addr)
268
269        if o is None:
270            return 'not part of a loaded object'
271
272        options = []
273
274        rva = AT.from_va(addr, o).to_rva()
275
276        idx = o.symbols.bisect_key_right(rva) - 1
277        while idx >= 0:
278            sym = o.symbols[idx]
279            if not sym.name or sym.is_import:
280                idx -= 1
281                continue
282            options.append((sym.relative_addr, '%s+' % sym.name))
283            break
284
285        if isinstance(o, ELF):
286            try:
287                plt_addr, plt_name = max((a, n) for n, a in o._plt.items() if a <= rva)
288            except ValueError:
289                pass
290            else:
291                options.append((plt_addr, 'PLT.%s+' % plt_name))
292
293        options.append((0, 'offset '))
294
295        if o.provides:
296            objname = o.provides
297        elif o.binary:
298            objname = os.path.basename(o.binary)
299        elif self.main_object is o:
300            objname = 'main binary'
301        else:
302            objname = 'object loaded from stream'
303
304        best_offset, best_prefix = max(options, key=lambda v: v[0])
305        return '%s%#x in %s (%#x)' % (best_prefix, rva - best_offset, objname, AT.from_va(addr, o).to_lva())
306
307    # Search functions
308
309    def find_object(self, spec, extra_objects=()):
310        """
311        If the given library specification has been loaded, return its object, otherwise return None.
312        """
313        if isinstance(spec, Backend):
314            for obj in self.all_objects:
315                if obj is spec:
316                    return obj
317            return None
318
319        if self._case_insensitive:
320            spec = spec.lower()
321        extra_idents = {}
322        for obj in extra_objects:
323            for ident in self._possible_idents(obj):
324                extra_idents[ident] = obj
325
326        for ident in self._possible_idents(spec):
327            if ident in self._satisfied_deps:
328                return self._satisfied_deps[ident]
329            if ident in extra_idents:
330                return extra_idents[ident]
331
332        return None
333
334    def find_object_containing(self, addr, membership_check=True):
335        """
336        Return the object that contains the given address, or None if the address is unmapped.
337
338        :param int addr:    The address that should be contained in the object.
339        :param bool membership_check:   Whether a membership check should be performed or not (True by default). This
340                                        option can be set to False if you are certain that the target object does not
341                                        have "holes".
342        :return:            The object or None.
343        """
344
345        def _check_object_memory(obj_):
346            if isinstance(obj_.memory, Clemory):
347                if AT.from_va(addr, obj_).to_rva() in obj_.memory:
348                    self._last_object = obj_
349                    return obj_
350                return None
351            elif type(obj_.memory) is str:
352                self._last_object = obj_
353                return obj_
354            else:
355                raise CLEError('Unsupported memory type %s' % type(obj_.memory))
356
357        # check the cache first
358        if self._last_object is not None and \
359                self._last_object.min_addr <= addr <= self._last_object.max_addr:
360            if not membership_check: return self._last_object
361            if not self._last_object.has_memory: return self._last_object
362            o = _check_object_memory(self._last_object)
363            if o: return o
364
365        if addr > self.max_addr or addr < self.min_addr:
366            return None
367
368        obj = key_bisect_floor_key(self.all_objects, addr, keyfunc=lambda obj: obj.min_addr)
369        if obj is None:
370            return None
371        if not obj.min_addr <= addr <= obj.max_addr:
372            return None
373        if not membership_check:
374            self._last_object = obj
375            return obj
376        if not obj.has_memory:
377            self._last_object = obj
378            return obj
379        return _check_object_memory(obj)
380
381    def find_segment_containing(self, addr, skip_pseudo_objects=True):
382        """
383        Find the section object that the address belongs to.
384
385        :param int addr: The address to test
386        :param bool skip_pseudo_objects: Skip objects that CLE adds during loading.
387        :return: The section that the address belongs to, or None if the address does not belong to any section, or if
388                section information is not available.
389        :rtype: cle.Segment
390        """
391
392        obj = self.find_object_containing(addr, membership_check=False)
393
394        if obj is None:
395            return None
396
397        if skip_pseudo_objects and isinstance(obj, (ExternObject, KernelObject, TLSObject)):
398            # the address is from a section allocated by angr.
399            return None
400
401        return obj.find_segment_containing(addr)
402
403    def find_section_containing(self, addr, skip_pseudo_objects=True):
404        """
405        Find the section object that the address belongs to.
406
407        :param int addr: The address to test.
408        :param bool skip_pseudo_objects: Skip objects that CLE adds during loading.
409        :return: The section that the address belongs to, or None if the address does not belong to any section, or if
410                section information is not available.
411        :rtype: cle.Section
412        """
413
414        obj = self.find_object_containing(addr, membership_check=False)
415
416        if obj is None:
417            return None
418
419        if skip_pseudo_objects and isinstance(obj, (ExternObject, KernelObject, TLSObject)):
420            # the address is from a special CLE section
421            return None
422
423        return obj.find_section_containing(addr)
424
425    def find_section_next_to(self, addr, skip_pseudo_objects=True):
426        """
427        Find the next section after the given address.
428
429        :param int addr: The address to test.
430        :param bool skip_pseudo_objects: Skip objects that CLE adds during loading.
431        :return: The next section that goes after the given address, or None if there is no section after the address,
432                 or if section information is not available.
433        :rtype: cle.Section
434        """
435
436        obj = self.find_object_containing(addr, membership_check=False)
437
438        if obj is None:
439            return None
440
441        if skip_pseudo_objects and isinstance(obj, (ExternObject, KernelObject, TLSObject)):
442            # the address is from a special CLE section
443            return None
444
445        return obj.sections.find_region_next_to(addr)
446
447    def find_symbol(self, thing, fuzzy=False):
448        """
449        Search for the symbol with the given name or address.
450
451        :param thing:       Either the name or address of a symbol to look up
452        :param fuzzy:       Set to True to return the first symbol before or at the given address
453
454        :returns:           A :class:`cle.backends.Symbol` object if found, None otherwise.
455        """
456        if type(thing) is archinfo.arch_soot.SootAddressDescriptor:
457            # Soot address
458            return thing.method.fullname
459        elif type(thing) is int:
460            # address
461            if fuzzy:
462                so = self.find_object_containing(thing)
463                if so is None:
464                    return None
465                objs = [so]
466            else:
467                objs = self.all_objects
468
469            for so in objs:
470                idx = so.symbols.bisect_key_right(AT.from_mva(thing, so).to_rva()) - 1
471                while idx >= 0 and (fuzzy or so.symbols[idx].rebased_addr == thing):
472                    if so.symbols[idx].is_import:
473                        idx -= 1
474                        continue
475                    return so.symbols[idx]
476        else:
477            # name
478            for so in self.all_objects:
479                if so is self._extern_object:
480                    continue
481                sym = so.get_symbol(thing)
482                if sym is None:
483                    continue
484
485                if sym.is_import:
486                    if sym.resolvedby is not None:
487                        if sym.resolvedby.is_forward and sym.resolvedby.resolvedby is not None:
488                            return sym.resolvedby.resolvedby
489                        return sym.resolvedby
490                else:
491                    if sym.is_forward and sym.resolvedby is not None:
492                        return sym.resolvedby
493                    return sym
494
495            if self._extern_object is not None:
496                sym = self.extern_object.get_symbol(thing)
497                if sym is not None:
498                    return sym
499
500        return None
501
502    @property
503    def symbols(self):
504        peeks = []
505        for so in self.all_objects:
506            if so.symbols:
507                i = iter(so.symbols)
508                n = next(i)
509                peeks.append((n, i))
510        while peeks:
511            element = min(peeks, key=lambda x: x[0].rebased_addr) # if we don't do this it might crash on comparing iterators
512            n, i = element
513            idx = peeks.index(element)
514            yield n
515            try:
516                peeks[idx] = next(i), i
517            except StopIteration:
518                peeks.pop(idx)
519
520    def find_all_symbols(self, name, exclude_imports=True, exclude_externs=False, exclude_forwards=True):
521        """
522        Iterate over all symbols present in the set of loaded binaries that have the given name
523
524        :param name:                The name to search for
525        :param exclude_imports:     Whether to exclude import symbols. Default True.
526        :param exclude_externs:     Whether to exclude symbols in the extern object. Default False.
527        :param exclude_forwards:    Whether to exclude forward symbols. Default True.
528        """
529        for so in self.all_objects:
530            sym = so.get_symbol(name)
531            if sym is None:
532                continue
533            if sym.is_import and exclude_imports:
534                continue
535            if sym.owner is self._extern_object and exclude_externs:
536                continue
537            if sym.is_forward and exclude_forwards:
538                continue
539
540            yield sym
541
542    def find_plt_stub_name(self, addr):
543        """
544        Return the name of the PLT stub starting at ``addr``.
545        """
546        so = self.find_object_containing(addr)
547        if so is not None and isinstance(so, MetaELF):
548            return so.reverse_plt.get(addr, None)
549        return None
550
551    def find_relevant_relocations(self, name):
552        """
553        Iterate through all the relocations referring to the symbol with the given ``name``
554        """
555        for so in self.all_objects:
556            for reloc in so.relocs:
557                if reloc.symbol is not None:
558                    if reloc.symbol.name == name:
559                        yield reloc
560
561    # Complicated stuff
562
563    def perform_irelative_relocs(self, resolver_func):
564        """
565        Use this method to satisfy ``IRelative`` relocations in the binary that require execution of loaded code.
566
567        Note that this does NOT handle ``IFunc`` symbols, which must be handled separately. (this could be changed, but
568        at the moment it's desirable to support lazy IFunc resolution, since emulation is usually slow)
569
570        :param resolver_func:   A callback function that takes an address, runs the code at that address, and returns
571                                the return value from the emulated function.
572        """
573        for obj in self.all_objects:
574            for resolver, dest in obj.irelatives:
575                val = resolver_func(resolver)
576                if val is not None:
577                    obj.memory.pack_word(dest, val)
578
579    def dynamic_load(self, spec):
580        """
581        Load a file into the address space. Note that the sematics of ``auto_load_libs`` and ``except_missing_libs``
582        apply at all times.
583
584        :param spec:    The path to the file to load. May be an absolute path, a relative path, or a name to search in
585                        the load path.
586
587        :return:        A list of all the objects successfully loaded, which may be empty if this object was previously
588                        loaded. If the object specified in ``spec`` failed to load for any reason, including the file
589                        not being found, return None.
590        """
591        try:
592            return self._internal_load(spec)
593        except CLEFileNotFoundError as e:
594            l.warning("Dynamic load failed: %r", e)
595            return None
596
597    def get_loader_symbolic_constraints(self):
598        """
599        Do not use this method.
600        """
601        if not self.aslr:
602            return []
603        if not claripy:
604            l.error("Please install claripy to get symbolic constraints")
605            return []
606        outputlist = []
607        for obj in self.all_objects:
608            #TODO Fix Symbolic for tls whatever
609            if obj.aslr and isinstance(obj.mapped_base_symbolic, claripy.ast.BV):
610                outputlist.append(obj.mapped_base_symbolic == obj.mapped_base)
611        return outputlist
612
613
614    # Private stuff
615
616    @staticmethod
617    def _is_linux_loader_name(name):
618        """
619        ld can have different names such as ld-2.19.so or ld-linux-x86-64.so.2 depending on symlinks and whatnot.
620        This determines if `name` is a suitable candidate for ld.
621        """
622        return 'ld.so' in name or 'ld64.so' in name or 'ld-linux' in name
623
624    def _internal_load(self, *args, preloading=()):
625        """
626        Pass this any number of files or libraries to load. If it can't load any of them for any reason, it will
627        except out. Note that the semantics of ``auto_load_libs`` and ``except_missing_libs`` apply at all times.
628
629        It will return a list of all the objects successfully loaded, which may be smaller than the list you provided
630        if any of them were previously loaded.
631
632        The ``main_binary`` has to come first, followed by any additional libraries to load this round. To create the
633        effect of "preloading", i.e. ensuring symbols are resolved to preloaded libraries ahead of any others, pass
634        ``preloading`` as a list of identifiers which should be considered preloaded. Note that the identifiers will
635        be compared using object identity.
636        """
637        # ideal loading pipeline:
638        # - load everything, independently and recursively until dependencies are satisfied
639        # - resolve symbol-based dependencies
640        # - layout address space, including (as a prerequisite) coming up with the layout for tls and externs
641        # - map everything into memory
642        # - perform relocations
643
644        # STEP 1
645        # Load everything. for each binary, load it in isolation so we end up with a Backend instance.
646        # If auto_load_libs is on, do this iteratively until all dependencies is satisfied
647        objects = []
648        preload_objects = []
649        dependencies = []
650        cached_failures = set() # this assumes that the load path is global and immutable by the time we enter this func
651
652        for main_spec in args:
653            is_preloading = any(spec is main_spec for spec in preloading)
654            if self.find_object(main_spec, extra_objects=objects) is not None:
655                l.info("Skipping load request %s - already loaded", main_spec)
656                continue
657            obj = self._load_object_isolated(main_spec)
658            objects.append(obj)
659            objects.extend(obj.child_objects)
660            dependencies.extend(obj.deps)
661
662            if self.main_object is None:
663                # this is technically the first place we can start to initialize things based on platform
664                self.main_object = obj
665                self.memory = Clemory(obj.arch, root=True)
666
667                chk_obj = self.main_object if isinstance(self.main_object, ELFCore) or not self.main_object.child_objects else self.main_object.child_objects[0]
668                if isinstance(chk_obj, ELFCore):
669                    self.tls = ELFCoreThreadManager(self, obj.arch)
670                elif isinstance(obj, Minidump):
671                    self.tls = MinidumpThreadManager(self, obj.arch)
672                elif isinstance(chk_obj, MetaELF):
673                    self.tls = ELFThreadManager(self, obj.arch)
674                elif isinstance(chk_obj, PE):
675                    self.tls = PEThreadManager(self, obj.arch)
676                else:
677                    self.tls = ThreadManager(self, obj.arch)
678
679            elif is_preloading:
680                self.preload_libs.append(obj)
681                preload_objects.append(obj)
682
683
684        while self._auto_load_libs and dependencies:
685            spec = dependencies.pop(0)
686            if spec in cached_failures:
687                l.debug("Skipping implicit dependency %s - cached failure", spec)
688                continue
689            if self.find_object(spec, extra_objects=objects) is not None:
690                l.debug("Skipping implicit dependency %s - already loaded", spec)
691                continue
692
693            try:
694                l.info("Loading %s...", spec)
695                obj = self._load_object_isolated(spec)  # loading dependencies
696            except CLEFileNotFoundError:
697                l.info("... not found")
698                cached_failures.add(spec)
699                if self._except_missing_libs:
700                    raise
701                continue
702
703            objects.append(obj)
704            objects.extend(obj.child_objects)
705            dependencies.extend(obj.deps)
706
707            if type(self.tls) is ThreadManager:   # ... java
708                if isinstance(obj, MetaELF):
709                    self.tls = ELFThreadManager(self, obj.arch)
710                elif isinstance(obj, PE):
711                    self.tls = PEThreadManager(self, obj.arch)
712
713        # STEP 1.5
714        # produce dependency-ordered list of objects and soname map
715
716        ordered_objects = []
717        soname_mapping = OrderedDict((obj.provides if not self._ignore_import_version_numbers else obj.provides.rstrip('.0123456789'), obj) for obj in objects if obj.provides)
718        seen = set()
719        def visit(obj):
720            if id(obj) in seen:
721                return
722            seen.add(id(obj))
723
724            stripped_deps = [dep if not self._ignore_import_version_numbers else dep.rstrip('.0123456789') for dep in obj.deps]
725            dep_objs = [soname_mapping[dep_name] for dep_name in stripped_deps if dep_name in soname_mapping]
726            for dep_obj in dep_objs:
727                visit(dep_obj)
728
729            ordered_objects.append(obj)
730
731        for obj in preload_objects + objects:
732            visit(obj)
733
734        # STEP 2
735        # Resolve symbol dependencies. Create an unmapped extern object, which may not be used
736        # after this step, everything should have the appropriate references to each other and the extern
737        # object should have all the space it needs allocated
738
739        extern_obj = ExternObject(self)
740
741        # tls registration
742        for obj in objects:
743            self.tls.register_object(obj)
744
745        # link everything
746        if self._perform_relocations:
747            for obj in ordered_objects:
748                l.info("Linking %s", obj.binary)
749                sibling_objs = list(obj.parent_object.child_objects) if obj.parent_object is not None else []
750                stripped_deps = [dep if not self._ignore_import_version_numbers else dep.rstrip('.0123456789') for dep in obj.deps]
751                dep_objs = [soname_mapping[dep_name] for dep_name in stripped_deps if dep_name in soname_mapping]
752                main_objs = [self.main_object] if self.main_object is not obj else []
753                for reloc in obj.relocs:
754                    reloc.resolve_symbol(main_objs + preload_objects + sibling_objs + dep_objs + [obj], extern_object=extern_obj)
755
756        # if the extern object was used, add it to the list of objects we're mapping
757        # also add it to the linked list of extern objects
758        if extern_obj.map_size:
759            # resolve the extern relocs this way because they may produce more relocations as we go
760            i = 0
761            while i < len(extern_obj.relocs):
762                extern_obj.relocs[i].resolve_symbol(objects, extern_object=extern_obj)
763                i += 1
764
765            objects.append(extern_obj)
766            ordered_objects.insert(0, extern_obj)
767            extern_obj._next_object = self._extern_object
768            self._extern_object = extern_obj
769
770            extern_obj._finalize_tls()
771            self.tls.register_object(extern_obj)
772
773        # STEP 3
774        # Map everything to memory
775        for obj in objects:
776            self._map_object(obj)
777
778        # STEP 4
779        # Perform relocations
780        if self._perform_relocations:
781            for obj in ordered_objects:
782                obj.relocate()
783
784        # Step 5
785        # Insert each object into the appropriate mappings for lookup by name
786        for obj in objects:
787            self.requested_names.update(obj.deps)
788            for ident in self._possible_idents(obj):
789                self._satisfied_deps[ident] = obj
790
791            if obj.provides is not None:
792                self.shared_objects[obj.provides] = obj
793
794        return objects
795
796    def _load_object_isolated(self, spec):
797        """
798        Given a partial specification of a dependency, this will return the loaded object as a backend instance.
799        It will not touch any loader-global data.
800        """
801        # STEP 1: identify file
802        if isinstance(spec, Backend):
803            return spec
804        elif hasattr(spec, 'read') and hasattr(spec, 'seek'):
805            binary_stream = spec
806            binary = None
807            close = False
808        elif type(spec) in (bytes, str):
809            binary = self._search_load_path(spec) # this is allowed to cheat and do partial static loading
810            l.debug("... using full path %s", binary)
811            binary_stream = open(binary, 'rb')
812            close = True
813        else:
814            raise CLEError("Bad library specification: %s" % spec)
815
816        try:
817            # STEP 2: collect options
818            if self.main_object is None:
819                options = dict(self._main_opts)
820            else:
821                for ident in self._possible_idents(binary_stream if binary is None else binary): # also allowed to cheat
822                    if ident in self._lib_opts:
823                        options = dict(self._lib_opts[ident])
824                        break
825                else:
826                    options = {}
827
828            # STEP 3: identify backend
829            backend_spec = options.pop('backend', None)
830            backend_cls = self._backend_resolver(backend_spec)
831            if backend_cls is None:
832                backend_cls = self._static_backend(binary_stream if binary is None else binary)
833            if backend_cls is None:
834                raise CLECompatibilityError("Unable to find a loader backend for %s.  Perhaps try the 'blob' loader?" % spec)
835
836            # STEP 4: LOAD!
837            l.debug("... loading with %s", backend_cls)
838
839            result = backend_cls(binary, binary_stream, is_main_bin=self.main_object is None, loader=self, **options)
840            result.close()
841            return result
842        finally:
843            if close:
844                binary_stream.close()
845
846    def _map_object(self, obj):
847        """
848        This will integrate the object into the global address space, but will not perform relocations.
849        """
850        obj_size = obj.max_addr - obj.min_addr + 1
851
852        if obj.pic:
853            if obj._custom_base_addr is not None and self._is_range_free(obj._custom_base_addr, obj_size):
854                base_addr = obj._custom_base_addr
855            elif obj.linked_base and self._is_range_free(obj.linked_base, obj_size):
856                base_addr = obj.linked_base
857            elif not obj.is_main_bin:
858                base_addr = self._find_safe_rebase_addr(obj_size)
859            else:
860                l.warning("The main binary is a position-independent executable. "
861                          "It is being loaded with a base address of 0x400000.")
862                base_addr = 0x400000
863
864            obj.rebase(base_addr)
865        else:
866            if obj._custom_base_addr is not None and not isinstance(obj, Blob):
867                l.warning("%s: base_addr was specified but the object is not PIC. "
868                          "specify force_rebase=True to override", obj.binary_basename)
869            base_addr = obj.linked_base
870            if not self._is_range_free(obj.linked_base, obj_size):
871                raise CLEError("Position-DEPENDENT object %s cannot be loaded at %#x"% (obj.binary, base_addr))
872
873        assert obj.mapped_base >= 0
874
875        if obj.has_memory:
876            assert obj.min_addr <= obj.max_addr
877            l.info("Mapping %s at %#x", obj.binary, base_addr)
878            self.memory.add_backer(base_addr, obj.memory)
879        obj._is_mapped = True
880        key_bisect_insort_right(self.all_objects, obj, keyfunc=lambda o: o.min_addr)
881
882    # Address space management
883
884    def _find_safe_rebase_addr(self, size):
885        """
886        Return a "safe" virtual address to map an object of size ``size``, i.e. one that won't
887        overlap with anything already loaded.
888        """
889        # this assumes that self.main_object exists, which should... definitely be safe
890        if self.main_object.arch.bits < 32 or self.main_object.max_addr >= 2**(self.main_object.arch.bits-1):
891            # HACK: On small arches, we should be more aggressive in packing stuff in.
892            gap_start = 0
893        else:
894            gap_start = ALIGN_UP(self.main_object.max_addr + 1, self._rebase_granularity)
895        for o in self.all_objects:
896            if gap_start + size <= o.min_addr:
897                break
898            else:
899                gap_start = ALIGN_UP(o.max_addr + 1, self._rebase_granularity)
900
901        if gap_start + size >= 2**self.main_object.arch.bits:
902            raise CLEOperationError("Ran out of room in address space")
903
904        return gap_start
905
906    def _is_range_free(self, va, size):
907        # self.main_object should not be None here
908        if va < 0 or va + size >= 2**self.main_object.arch.bits:
909            return False
910
911        for o in self.all_objects:
912            if o.min_addr <= va <= o.max_addr or va <= o.min_addr < va + size:
913                return False
914
915        return True
916
917    # Functions of the form "use some heuristic to tell me about this spec"
918
919    def _search_load_path(self, spec):
920        """
921        This will return the most likely full path that could satisfy the given partial specification.
922
923        It will prefer files of a known filetype over files of an unknown filetype.
924        """
925        # this could be converted to being an iterator pretty easily
926        for path in self._possible_paths(spec):
927            if self.main_object is not None:
928                backend_cls = self._static_backend(path)
929                if backend_cls is None:
930                    continue
931                # If arch of main object is Soot ...
932                if isinstance(self.main_object.arch, ArchSoot):
933                    # ... skip compatibility check, since it always evaluates to false
934                    # with native libraries (which are the only valid dependencies)
935                    return path
936                if not backend_cls.check_compatibility(path, self.main_object):
937                    continue
938
939            return path
940
941        raise CLEFileNotFoundError("Could not find file %s" % spec)
942
943    def _possible_paths(self, spec):
944        """
945        This iterates through each possible path that could possibly be used to satisfy the specification.
946
947        The only check performed is whether the file exists or not.
948        """
949        dirs = []
950        dirs.extend(self._custom_ld_path)                   # if we say dirs = blah, we modify the original
951
952        if self.main_object is not None:
953            # add path of main binary
954            if self.main_object.binary is not None:
955                dirs.append(os.path.dirname(self.main_object.binary))
956            # if arch of main_object is Soot ...
957            is_arch_soot = isinstance(self.main_object.arch, ArchSoot)
958            if is_arch_soot:
959                # ... extend with load path of native libraries
960                dirs.extend(self.main_object.extra_load_path)
961                if self._use_system_libs:
962                    l.debug("Path to system libraries (usually added as dependencies of JNI libs) needs "
963                            "to be specified manually, by using the custom_ld_path option.")
964            # add path of system libraries
965            if self._use_system_libs and not is_arch_soot:
966                # Ideally this should be taken into account for each shared
967                # object, not just the main object.
968                dirs.extend(self.main_object.extra_load_path)
969                if sys.platform.startswith('linux'):
970                    dirs.extend(self.main_object.arch.library_search_path())
971                elif sys.platform == 'win32':
972                    native_dirs = os.environ['PATH'].split(';')
973
974                    # simulate the wow64 filesystem redirect, working around the fact that WE may be impacted by it as
975                    # a 32-bit python process.......
976                    python_is_32bit = platform.architecture()[0] == '32bit'
977                    guest_is_32bit = self.main_object.arch.bits == 32
978
979                    if python_is_32bit != guest_is_32bit:
980                        redirect_dir = os.path.join(os.environ['SystemRoot'], 'system32').lower()
981                        target_dir = os.path.join(os.environ['SystemRoot'], 'SysWOW64' if guest_is_32bit else 'sysnative')
982                        i = 0
983                        while i < len(native_dirs):
984                            if native_dirs[i].lower().startswith(redirect_dir):
985                                # replace the access to System32 with SysWOW64 or sysnative
986                                native_dirs[i] = target_dir + native_dirs[i][len(target_dir):]
987                            i += 1
988
989                    dirs.extend(native_dirs)
990
991        dirs.append('.')
992
993
994        if self._case_insensitive:
995            spec = spec.lower()
996
997        for libdir in dirs:
998            if self._case_insensitive:
999                insensitive_path = self._path_insensitive(os.path.join(libdir, spec))
1000                if insensitive_path is not None:
1001                    yield os.path.realpath(insensitive_path)
1002            else:
1003                fullpath = os.path.realpath(os.path.join(libdir, spec))
1004                if os.path.exists(fullpath):
1005                    yield fullpath
1006
1007            if self._ignore_import_version_numbers:
1008                try:
1009                    for libname in os.listdir(libdir):
1010                        ilibname = libname.lower() if self._case_insensitive else libname
1011                        if ilibname.strip('.0123456789') == spec.strip('.0123456789'):
1012                            yield os.path.realpath(os.path.join(libdir, libname))
1013                except (IOError, OSError): pass
1014
1015    @classmethod
1016    def _path_insensitive(cls, path):
1017        """
1018        Get a case-insensitive path for use on a case sensitive system, or return None if it doesn't exist.
1019
1020        From https://stackoverflow.com/a/8462613
1021        """
1022        if path == '' or os.path.exists(path):
1023            return path
1024        base = os.path.basename(path)  # may be a directory or a file
1025        dirname = os.path.dirname(path)
1026        suffix = ''
1027        if not base:  # dir ends with a slash?
1028            if len(dirname) < len(path):
1029                suffix = path[:len(path) - len(dirname)]
1030            base = os.path.basename(dirname)
1031            dirname = os.path.dirname(dirname)
1032        if not os.path.exists(dirname):
1033            dirname = cls._path_insensitive(dirname)
1034            if not dirname:
1035                return None
1036        # at this point, the directory exists but not the file
1037        try:  # we are expecting dirname to be a directory, but it could be a file
1038            files = os.listdir(dirname)
1039        except OSError:
1040            return None
1041        baselow = base.lower()
1042        try:
1043            basefinal = next(fl for fl in files if fl.lower() == baselow)
1044        except StopIteration:
1045            return None
1046        if basefinal:
1047            return os.path.join(dirname, basefinal) + suffix
1048        else:
1049            return None
1050
1051    def _possible_idents(self, spec, lowercase=False):
1052        """
1053        This iterates over all the possible identifiers that could be used to describe the given specification.
1054        """
1055        if isinstance(spec, Backend):
1056            if spec.provides is not None:
1057                yield spec.provides
1058                if self._ignore_import_version_numbers:
1059                    yield spec.provides.rstrip('.0123456789')
1060            if spec.binary:
1061                yield spec.binary
1062                yield os.path.basename(spec.binary)
1063                yield os.path.basename(spec.binary).split('.')[0]
1064                if self._ignore_import_version_numbers:
1065                    yield os.path.basename(spec.binary).rstrip('.0123456789')
1066        elif hasattr(spec, 'read') and hasattr(spec, 'seek'):
1067            backend_cls = self._static_backend(spec, ignore_hints=True)
1068            if backend_cls is not None:
1069                soname = backend_cls.extract_soname(spec)
1070                if soname is not None:
1071                    yield soname
1072                    if self._ignore_import_version_numbers:
1073                        yield soname.rstrip('.0123456789')
1074        elif type(spec) in (bytes, str):
1075            yield spec
1076            yield os.path.basename(spec)
1077            yield os.path.basename(spec).split('.')[0]
1078            if self._ignore_import_version_numbers:
1079                yield os.path.basename(spec).rstrip('.0123456789')
1080
1081            if os.path.exists(spec):
1082                backend_cls = self._static_backend(spec, ignore_hints=True)
1083                if backend_cls is not None:
1084                    soname = backend_cls.extract_soname(spec)
1085                    if soname is not None:
1086                        yield soname
1087                        if self._ignore_import_version_numbers:
1088                            yield soname.rstrip('.0123456789')
1089
1090        if not lowercase and (sys.platform == 'win32' or self._case_insensitive):
1091            for name in self._possible_idents(spec, lowercase=True):
1092                yield name.lower()
1093
1094    def _static_backend(self, spec, ignore_hints=False):
1095        """
1096        Returns the correct loader for the file at `spec`.
1097        Returns None if it's a blob or some unknown type.
1098        TODO: Implement some binwalk-like thing to carve up blobs automatically
1099        """
1100
1101        if not ignore_hints:
1102            for ident in self._possible_idents(spec):
1103                try:
1104                    return self._backend_resolver(self._lib_opts[ident]['backend'])
1105                except KeyError:
1106                    pass
1107
1108        with stream_or_path(spec) as stream:
1109            for rear in ALL_BACKENDS.values():
1110                if rear.is_default and rear.is_compatible(stream):
1111                    return rear
1112
1113        return None
1114
1115    @staticmethod
1116    def _backend_resolver(backend, default=None):
1117        if isinstance(backend, type) and issubclass(backend, Backend):
1118            return backend
1119        elif backend in ALL_BACKENDS:
1120            return ALL_BACKENDS[backend]
1121        elif backend is None:
1122            return default
1123        else:
1124            raise CLEError('Invalid backend: %s' % backend)
1125
1126
1127from .errors import CLEError, CLEFileNotFoundError, CLECompatibilityError, CLEOperationError
1128from .memory import Clemory
1129from .backends import MetaELF, ELF, PE, ELFCore, Minidump, Blob, ALL_BACKENDS, Backend
1130from .backends.tls import ThreadManager, ELFThreadManager, PEThreadManager, ELFCoreThreadManager, MinidumpThreadManager, TLSObject
1131from .backends.externs import ExternObject, KernelObject
1132from .utils import stream_or_path
1133