1import copy
2import os
3import archinfo
4from collections import defaultdict
5import logging
6import inspect
7from typing import Optional, Dict, Type, TYPE_CHECKING
8
9import itanium_demangler
10
11from ...sim_type import parse_cpp_file, SimTypeFunction
12from ...calling_conventions import SimCC, DEFAULT_CC
13from ...misc import autoimport
14from ...sim_type import parse_file
15from ..stubs.ReturnUnconstrained import ReturnUnconstrained
16from ..stubs.syscall_stub import syscall as stub_syscall
17
18if TYPE_CHECKING:
19    from angr.calling_conventions import SimCCSyscall
20
21
22l = logging.getLogger(name=__name__)
23SIM_LIBRARIES: Dict[str,'SimLibrary'] = {}
24
25
26class SimLibrary:
27    """
28    A SimLibrary is the mechanism for describing a dynamic library's API, its functions and metadata.
29
30    Any instance of this class (or its subclasses) found in the ``angr.procedures.definitions`` package will be
31    automatically picked up and added to ``angr.SIM_LIBRARIES`` via all its names.
32
33    :ivar fallback_cc:      A mapping from architecture to the default calling convention that should be used if no
34                            other information is present. Contains some sane defaults for linux.
35    :ivar fallback_proc:    A SimProcedure class that should be used to provide stub procedures. By default,
36                            ``ReturnUnconstrained``.
37    """
38    def __init__(self):
39        self.procedures = {}
40        self.non_returning = set()
41        self.prototypes: Dict[str,SimTypeFunction] = {}
42        self.default_ccs = {}
43        self.names = []
44        self.fallback_cc = dict(DEFAULT_CC)
45        self.fallback_proc = ReturnUnconstrained
46
47    def copy(self):
48        """
49        Make a copy of this SimLibrary, allowing it to be mutated without affecting the global version.
50
51        :return:    A new SimLibrary object with the same library references but different dict/list references
52        """
53        o = SimLibrary()
54        o.procedures = dict(self.procedures)
55        o.non_returning = set(self.non_returning)
56        o.prototypes = dict(self.prototypes)
57        o.default_ccs = dict(self.default_ccs)
58        o.names = list(self.names)
59        return o
60
61    def update(self, other):
62        """
63        Augment this SimLibrary with the information from another SimLibrary
64
65        :param other:   The other SimLibrary
66        """
67        self.procedures.update(other.procedures)
68        self.non_returning.update(other.non_returning)
69        self.prototypes.update(other.prototypes)
70        self.default_ccs.update(other.default_ccs)
71
72    @property
73    def name(self):
74        """
75        The first common name of this library, e.g. libc.so.6, or '??????' if none are known.
76        """
77        return self.names[0] if self.names else '??????'
78
79    def set_library_names(self, *names):
80        """
81        Set some common names of this library by which it may be referred during linking
82
83        :param names:   Any number of string library names may be passed as varargs.
84        """
85        for name in names:
86            self.names.append(name)
87            SIM_LIBRARIES[name] = self
88
89    def set_default_cc(self, arch_name, cc_cls):
90        """
91        Set the default calling convention used for this library under a given architecture
92
93        :param arch_name:   The string name of the architecture, i.e. the ``.name`` field from archinfo.
94        :parm cc_cls:       The SimCC class (not an instance!) to use
95        """
96        arch_name = archinfo.arch_from_id(arch_name).name
97        self.default_ccs[arch_name] = cc_cls
98
99    def set_non_returning(self, *names):
100        """
101        Mark some functions in this class as never returning, i.e. loops forever or terminates execution
102
103        :param names:   Any number of string function names may be passed as varargs
104        """
105        for name in names:
106            self.non_returning.add(name)
107
108    def set_prototype(self, name, proto):
109        """
110        Set the prototype of a function in the form of a SimTypeFunction containing argument and return types
111
112        :param name:    The name of the function as a string
113        :param proto:   The prototype of the function as a SimTypeFunction
114        """
115        self.prototypes[name] = proto
116
117    def set_prototypes(self, protos):
118        """
119        Set the prototypes of many functions
120
121        :param protos:   Dictionary mapping function names to SimTypeFunction objects
122        """
123        self.prototypes.update(protos)
124
125    def set_c_prototype(self, c_decl):
126        """
127        Set the prototype of a function in the form of a C-style function declaration.
128
129        :param str c_decl: The C-style declaration of the function.
130        :return:           A tuple of (function name, function prototype)
131        :rtype:            tuple
132        """
133
134        parsed = parse_file(c_decl)
135        parsed_decl = parsed[0]
136        if not parsed_decl:
137            raise ValueError('Cannot parse the function prototype.')
138        func_name, func_proto = next(iter(parsed_decl.items()))
139
140        self.set_prototype(func_name, func_proto)
141
142        return func_name, func_proto
143
144    def add(self, name, proc_cls, **kwargs):
145        """
146        Add a function implementation fo the library.
147
148        :param name:        The name of the function as a string
149        :param proc_cls:    The implementation of the function as a SimProcedure _class_, not instance
150        :param kwargs:      Any additional parameters to the procedure class constructor may be passed as kwargs
151        """
152        self.procedures[name] = proc_cls(display_name=name, **kwargs)
153
154    def add_all_from_dict(self, dictionary, **kwargs):
155        """
156        Batch-add function implementations to the library.
157
158        :param dictionary:  A mapping from name to procedure class, i.e. the first two arguments to add()
159        :param kwargs:      Any additional kwargs will be passed to the constructors of _each_ procedure class
160        """
161        for name, procedure in dictionary.items():
162            self.add(name, procedure, **kwargs)
163
164    def add_alias(self, name, *alt_names):
165        """
166        Add some duplicate names for a given function. The original function's implementation must already be
167        registered.
168
169        :param name:        The name of the function for which an implementation is already present
170        :param alt_names:   Any number of alternate names may be passed as varargs
171        """
172        old_procedure = self.procedures[name]
173        for alt in alt_names:
174            new_procedure = copy.deepcopy(old_procedure)
175            new_procedure.display_name = alt
176            self.procedures[alt] = new_procedure
177
178    def _apply_metadata(self, proc, arch):
179        if proc.cc is None and arch.name in self.default_ccs:
180            proc.cc = self.default_ccs[arch.name](arch)
181            if proc.cc.func_ty is not None:
182                # Use inspect to extract the parameters from the run python function
183                proc.cc.func_ty.arg_names = inspect.getfullargspec(proc.run).args[1:]
184        if proc.cc is None and arch.name in self.fallback_cc:
185            proc.cc = self.fallback_cc[arch.name](arch)
186        if proc.display_name in self.prototypes:
187            proc.cc.func_ty = self.prototypes[proc.display_name].with_arch(arch)
188            # Use inspect to extract the parameters from the run python function
189            proc.cc.func_ty.arg_names = inspect.getfullargspec(proc.run).args[1:]
190            if not proc.ARGS_MISMATCH:
191                proc.cc.num_args = len(proc.cc.func_ty.args)
192                proc.num_args = len(proc.cc.func_ty.args)
193        if proc.display_name in self.non_returning:
194            proc.returns = False
195        proc.library_name = self.name
196
197    def get(self, name, arch):
198        """
199        Get an implementation of the given function specialized for the given arch, or a stub procedure if none exists.
200
201        :param name:    The name of the function as a string
202        :param arch:    The architecure to use, as either a string or an archinfo.Arch instance
203        :return:        A SimProcedure instance representing the function as found in the library
204        """
205        if type(arch) is str:
206            arch = archinfo.arch_from_id(arch)
207        if name in self.procedures:
208            proc = copy.deepcopy(self.procedures[name])
209            self._apply_metadata(proc, arch)
210            return proc
211        else:
212            return self.get_stub(name, arch)
213
214    def get_stub(self, name, arch):
215        """
216        Get a stub procedure for the given function, regardless of if a real implementation is available. This will
217        apply any metadata, such as a default calling convention or a function prototype.
218
219        By stub, we pretty much always mean a ``ReturnUnconstrained`` SimProcedure with the appropriate display name
220        and metadata set. This will appear in ``state.history.descriptions`` as ``<SimProcedure display_name (stub)>``
221
222        :param name:    The name of the function as a string
223        :param arch:    The architecture to use, as either a string or an archinfo.Arch instance
224        :return:        A SimProcedure instance representing a plausable stub as could be found in the library.
225        """
226        proc = self.fallback_proc(display_name=name, is_stub=True)
227        self._apply_metadata(proc, arch)
228        return proc
229
230    def get_prototype(self, name: str, arch=None) -> Optional[SimTypeFunction]:
231        """
232        Get a prototype of the given function name, optionally specialize the prototype to a given architecture.
233
234        :param name:    Name of the function.
235        :param arch:    The architecture to specialize to.
236        :return:        Prototype of the function, or None if the prototype does not exist.
237        """
238        proto = self.prototypes.get(name, None)
239        if proto is None:
240            return None
241        if arch is not None:
242            return proto.with_arch(arch)
243        return proto
244
245    def has_metadata(self, name):
246        """
247        Check if a function has either an implementation or any metadata associated with it
248
249        :param name:    The name of the function as a string
250        :return:        A bool indicating if anything is known about the function
251        """
252        return self.has_implementation(name) or \
253            name in self.non_returning or \
254            name in self.prototypes
255
256    def has_implementation(self, name):
257        """
258        Check if a function has an implementation associated with it
259
260        :param name:    The name of the function as a string
261        :return:        A bool indicating if an implementation of the function is available
262        """
263        return name in self.procedures
264
265    def has_prototype(self, func_name):
266        """
267        Check if a function has a prototype associated with it.
268
269        :param str func_name: The name of the function.
270        :return:              A bool indicating if a prototype of the function is available.
271        :rtype:               bool
272        """
273
274        return func_name in self.prototypes
275
276
277class SimCppLibrary(SimLibrary):
278    """
279    SimCppLibrary is a specialized version of SimLibrary that will demangle C++ function names before looking for an
280    implementation or prototype for it.
281    """
282
283    @staticmethod
284    def _try_demangle(name):
285        if name[0:2] == "_Z":
286            try:
287                ast = itanium_demangler.parse(name)
288            except NotImplementedError:
289                return name
290            if ast:
291                return str(ast)
292        return name
293
294    @staticmethod
295    def _proto_from_demangled_name(name: str) -> Optional[SimCC]:
296        """
297        Attempt to extract arguments and calling convention information for a C++ function whose name was mangled
298        according to the Itanium C++ ABI symbol mangling language.
299
300        :param name:    The demangled function name.
301        :return:        A calling convention or None if a calling convention cannot be found.
302        """
303
304        try:
305            parsed, _ = parse_cpp_file(name, with_param_names=False)
306        except ValueError:
307            return None
308        if not parsed:
309            return None
310        _, func_proto = next(iter(parsed.items()))
311        return func_proto
312
313    def get(self, name, arch):
314        """
315        Get an implementation of the given function specialized for the given arch, or a stub procedure if none exists.
316        Demangle the function name if it is a mangled C++ name.
317
318        :param str name:    The name of the function as a string
319        :param arch:    The architecure to use, as either a string or an archinfo.Arch instance
320        :return:        A SimProcedure instance representing the function as found in the library
321        """
322        demangled_name = self._try_demangle(name)
323        if demangled_name not in self.procedures:
324            return self.get_stub(name, arch)  # get_stub() might use the mangled name to derive the function prototype
325        return super().get(demangled_name, arch)
326
327    def get_stub(self, name, arch):
328        """
329        Get a stub procedure for the given function, regardless of if a real implementation is available. This will
330        apply any metadata, such as a default calling convention or a function prototype. Demangle the function name
331        if it is a mangled C++ name.
332
333        :param str name:    The name of the function as a string
334        :param arch:        The architecture to use, as either a string or an archinfo.Arch instance
335        :return:            A SimProcedure instance representing a plausable stub as could be found in the library.
336        """
337        demangled_name = self._try_demangle(name)
338        stub = super().get_stub(demangled_name, arch)
339        # try to determine a prototype from the function name if possible
340        if demangled_name != name:
341            # itanium-mangled function name
342            stub.cc.set_func_type_with_arch(self._proto_from_demangled_name(demangled_name))
343            if stub.cc.func_ty is not None and not stub.ARGS_MISMATCH:
344                stub.cc.num_args = len(stub.cc.func_ty.args)
345                stub.num_args = len(stub.cc.func_ty.args)
346        return stub
347
348    def get_prototype(self, name: str, arch=None) -> Optional[SimTypeFunction]:
349        """
350        Get a prototype of the given function name, optionally specialize the prototype to a given architecture. The
351        function name will be demangled first.
352
353        :param name:    Name of the function.
354        :param arch:    The architecture to specialize to.
355        :return:        Prototype of the function, or None if the prototype does not exist.
356        """
357        demangled_name = self._try_demangle(name)
358        return super().get_prototype(demangled_name, arch=arch)
359
360    def has_metadata(self, name):
361        """
362        Check if a function has either an implementation or any metadata associated with it. Demangle the function name
363        if it is a mangled C++ name.
364
365        :param name:    The name of the function as a string
366        :return:        A bool indicating if anything is known about the function
367        """
368        name = self._try_demangle(name)
369        return super().has_metadata(name)
370
371    def has_implementation(self, name):
372        """
373        Check if a function has an implementation associated with it. Demangle the function name if it is a mangled C++
374        name.
375
376        :param str name:    A mangled function name.
377        :return:            bool
378        """
379        return super().has_implementation(self._try_demangle(name))
380
381    def has_prototype(self, func_name):
382        """
383        Check if a function has a prototype associated with it. Demangle the function name if it is a mangled C++ name.
384
385        :param str name:    A mangled function name.
386        :return:            bool
387        """
388        return super().has_prototype(self._try_demangle(func_name))
389
390
391class SimSyscallLibrary(SimLibrary):
392    """
393    SimSyscallLibrary is a specialized version of SimLibrary for dealing not with a dynamic library's API but rather
394    an operating system's syscall API. Because this interface is inherently lower-level than a dynamic library, many
395    parts of this class has been changed to store data based on an "ABI name" (ABI = application binary interface,
396    like an API but for when there's no programming language) instead of an architecture. An ABI name is just an
397    arbitrary string with which a calling convention and a syscall numbering is associated.
398
399    All the SimLibrary methods for adding functions still work, but now there's an additional layer on top that
400    associates them with numbers.
401    """
402    def __init__(self):
403        super().__init__()
404        self.syscall_number_mapping: Dict[str,Dict[int,str]] = defaultdict(dict)  # keyed by abi
405        self.syscall_name_mapping: Dict[str,Dict[str,int]] = defaultdict(dict)  # keyed by abi
406        self.default_cc_mapping: Dict[str,Type['SimCCSyscall']] = {}  # keyed by abi
407        self.syscall_prototypes: Dict[str,Dict[str,SimTypeFunction]] = defaultdict(dict)  # keyed by abi
408        self.fallback_proc = stub_syscall
409
410    def copy(self):
411        o = SimSyscallLibrary()
412        o.procedures = dict(self.procedures)
413        o.non_returning = set(self.non_returning)
414        o.prototypes = dict(self.prototypes)
415        o.default_ccs = dict(self.default_ccs)
416        o.names = list(self.names)
417        o.syscall_number_mapping = defaultdict(dict, self.syscall_number_mapping) # {abi: {number: name}}
418        o.syscall_name_mapping = defaultdict(dict, self.syscall_name_mapping) # {abi: {name: number}}
419        o.syscall_prototypes = defaultdict(dict, self.syscall_prototypes) # as above
420        o.default_cc_mapping = dict(self.default_cc_mapping) # {abi: cc}
421        return o
422
423    def update(self, other):
424        super().update(other)
425        self.syscall_number_mapping.update(other.syscall_number_mapping)
426        self.syscall_name_mapping.update(other.syscall_name_mapping)
427        self.default_cc_mapping.update(other.default_cc_mapping)
428
429    def minimum_syscall_number(self, abi):
430        """
431        :param abi: The abi to evaluate
432        :return:    The smallest syscall number known for the given abi
433        """
434        if abi not in self.syscall_number_mapping or \
435                not self.syscall_number_mapping[abi]:
436            return 0
437        return min(self.syscall_number_mapping[abi])
438
439    def maximum_syscall_number(self, abi):
440        """
441        :param abi: The abi to evaluate
442        :return:    The largest syscall number known for the given abi
443        """
444        if abi not in self.syscall_number_mapping or \
445                not self.syscall_number_mapping[abi]:
446            return 0
447        return max(self.syscall_number_mapping[abi])
448
449    def add_number_mapping(self, abi, number, name):
450        """
451        Associate a syscall number with the name of a function present in the underlying SimLibrary
452
453        :param abi:     The abi for which this mapping applies
454        :param number:  The syscall number
455        :param name:    The name of the function
456        """
457        self.syscall_number_mapping[abi][number] = name
458        self.syscall_name_mapping[abi][name] = number
459
460    def add_number_mapping_from_dict(self, abi, mapping):
461        """
462        Batch-associate syscall numbers with names of functions present in the underlying SimLibrary
463
464        :param abi:     The abi for which this mapping applies
465        :param mapping: A dict mapping syscall numbers to function names
466        """
467        self.syscall_number_mapping[abi].update(mapping)
468        self.syscall_name_mapping[abi].update(dict(reversed(i) for i in mapping.items()))
469
470    def set_abi_cc(self, abi, cc_cls):
471        """
472        Set the default calling convention for an abi
473
474        :param abi:     The name of the abi
475        :param cc_cls:  A SimCC _class_, not an instance, that should be used for syscalls using the abi
476        """
477        self.default_cc_mapping[abi] = cc_cls
478
479    def set_prototype(self, abi: str, name: str, proto: SimTypeFunction) -> None:  # pylint: disable=arguments-differ
480        """
481        Set the prototype of a function in the form of a SimTypeFunction containing argument and return types
482
483        :param abi:     ABI of the syscall.
484        :param name:    The name of the syscall as a string
485        :param proto:   The prototype of the syscall as a SimTypeFunction
486        """
487        self.syscall_prototypes[abi][name] = proto
488
489    def set_prototypes(self, abi: str, protos: Dict[str,SimTypeFunction]) -> None:  # pylint: disable=arguments-differ
490        """
491        Set the prototypes of many syscalls.
492
493        :param abi:     ABI of the syscalls.
494        :param protos:  Dictionary mapping syscall names to SimTypeFunction objects
495        """
496        self.syscall_prototypes[abi].update(protos)
497
498    def _canonicalize(self, number, arch, abi_list):
499        if type(arch) is str:
500            arch = archinfo.arch_from_id(arch)
501        if type(number) is str:
502            return number, arch, None
503        for abi in abi_list:
504            mapping = self.syscall_number_mapping[abi]
505            if number in mapping:
506                return mapping[number], arch, abi
507        return 'sys_%d' % number, arch, None
508
509    def _apply_numerical_metadata(self, proc, number, arch, abi):
510        proc.syscall_number = number
511        proc.abi = abi
512        if abi in self.default_cc_mapping:
513            cc = self.default_cc_mapping[abi](arch)
514            if proc.cc is not None:
515                cc.set_func_type_with_arch(proc.cc.func_ty)
516            proc.cc = cc
517        # a bit of a hack.
518        name = proc.display_name
519        if self.syscall_prototypes[abi].get(name, None) is not None and proc.cc is not None:
520            proc.cc.func_ty = self.syscall_prototypes[abi][name].with_arch(arch)
521
522    # pylint: disable=arguments-differ
523    def get(self, number, arch, abi_list=()):
524        """
525        The get() function for SimSyscallLibrary looks a little different from its original version.
526
527        Instead of providing a name, you provide a number, and you additionally provide a list of abi names that are
528        applicable. The first abi for which the number is present in the mapping will be chosen. This allows for the
529        easy abstractions of architectures like ARM or MIPS linux for which there are many ABIs that can be used at any
530        time by using syscall numbers from various ranges. If no abi knows about the number, the stub procedure with
531        the name "sys_%d" will be used.
532
533        :param number:      The syscall number
534        :param arch:        The architecture being worked with, as either a string name or an archinfo.Arch
535        :param abi_list:    A list of ABI names that could be used
536        :return:            A SimProcedure representing the implementation of the given syscall, or a stub if no
537                            implementation is available
538        """
539        name, arch, abi = self._canonicalize(number, arch, abi_list)
540        proc = super().get(name, arch)
541        proc.is_syscall = True
542        self._apply_numerical_metadata(proc, number, arch, abi)
543        return proc
544
545    def get_stub(self, number, arch, abi_list=()):
546        """
547        Pretty much the intersection of SimLibrary.get_stub() and SimSyscallLibrary.get().
548
549        :param number:      The syscall number
550        :param arch:        The architecture being worked with, as either a string name or an archinfo.Arch
551        :param abi_list:    A list of ABI names that could be used
552        :return:            A SimProcedure representing a plausable stub that could model the syscall
553        """
554        name, arch, abi = self._canonicalize(number, arch, abi_list)
555        proc = super().get_stub(name, arch)
556        self._apply_numerical_metadata(proc, number, arch, abi)
557        l.debug("unsupported syscall: %s", number)
558        return proc
559
560    def get_prototype(self, abi: str, name: str, arch=None) -> Optional[SimTypeFunction]:
561        """
562        Get a prototype of the given syscall name and its ABI, optionally specialize the prototype to a given
563        architecture.
564
565        :param abi:     ABI of the prototype to get.
566        :param name:    Name of the syscall.
567        :param arch:    The architecture to specialize to.
568        :return:        Prototype of the syscall, or None if the prototype does not exist.
569        """
570        if abi not in self.syscall_prototypes:
571            return None
572        proto = self.syscall_prototypes[abi].get(name, None)
573        if proto is None:
574            return None
575        return proto.with_arch(arch=arch)
576
577    def has_metadata(self, number, arch, abi_list=()):
578        """
579        Pretty much the intersection of SimLibrary.has_metadata() and SimSyscallLibrary.get().
580
581        :param number:      The syscall number
582        :param arch:        The architecture being worked with, as either a string name or an archinfo.Arch
583        :param abi_list:    A list of ABI names that could be used
584        :return:            A bool of whether or not any implementation or metadata is known about the given syscall
585        """
586        name, _, _ = self._canonicalize(number, arch, abi_list)
587        return super().has_metadata(name)
588
589    def has_implementation(self, number, arch, abi_list=()):
590        """
591        Pretty much the intersection of SimLibrary.has_implementation() and SimSyscallLibrary.get().
592
593        :param number:      The syscall number
594        :param arch:        The architecture being worked with, as either a string name or an archinfo.Arch
595        :param abi_list:    A list of ABI names that could be used
596        :return:            A bool of whether or not an implementation of the syscall is available
597        """
598        name, _, _ = self._canonicalize(number, arch, abi_list)
599        return super().has_implementation(name)
600
601    def has_prototype(self, abi: str, name: str) -> bool:
602        """
603        Check if a function has a prototype associated with it. Demangle the function name if it is a mangled C++ name.
604
605        :param abi:         Name of the ABI.
606        :param name:        The syscall name.
607        :return:            bool
608        """
609        if abi not in self.syscall_prototypes:
610            return False
611        return name in self.syscall_prototypes[abi]
612
613
614for _ in autoimport.auto_import_modules('angr.procedures.definitions', os.path.dirname(os.path.realpath(__file__))):
615    pass
616