1import copy 2import os 3import archinfo 4from collections import defaultdict 5import logging 6import inspect 7from typing import Optional, Dict, Type, TYPE_CHECKING 8 9import itanium_demangler 10 11from ...sim_type import parse_cpp_file, SimTypeFunction 12from ...calling_conventions import SimCC, DEFAULT_CC 13from ...misc import autoimport 14from ...sim_type import parse_file 15from ..stubs.ReturnUnconstrained import ReturnUnconstrained 16from ..stubs.syscall_stub import syscall as stub_syscall 17 18if TYPE_CHECKING: 19 from angr.calling_conventions import SimCCSyscall 20 21 22l = logging.getLogger(name=__name__) 23SIM_LIBRARIES: Dict[str,'SimLibrary'] = {} 24 25 26class SimLibrary: 27 """ 28 A SimLibrary is the mechanism for describing a dynamic library's API, its functions and metadata. 29 30 Any instance of this class (or its subclasses) found in the ``angr.procedures.definitions`` package will be 31 automatically picked up and added to ``angr.SIM_LIBRARIES`` via all its names. 32 33 :ivar fallback_cc: A mapping from architecture to the default calling convention that should be used if no 34 other information is present. Contains some sane defaults for linux. 35 :ivar fallback_proc: A SimProcedure class that should be used to provide stub procedures. By default, 36 ``ReturnUnconstrained``. 37 """ 38 def __init__(self): 39 self.procedures = {} 40 self.non_returning = set() 41 self.prototypes: Dict[str,SimTypeFunction] = {} 42 self.default_ccs = {} 43 self.names = [] 44 self.fallback_cc = dict(DEFAULT_CC) 45 self.fallback_proc = ReturnUnconstrained 46 47 def copy(self): 48 """ 49 Make a copy of this SimLibrary, allowing it to be mutated without affecting the global version. 50 51 :return: A new SimLibrary object with the same library references but different dict/list references 52 """ 53 o = SimLibrary() 54 o.procedures = dict(self.procedures) 55 o.non_returning = set(self.non_returning) 56 o.prototypes = dict(self.prototypes) 57 o.default_ccs = dict(self.default_ccs) 58 o.names = list(self.names) 59 return o 60 61 def update(self, other): 62 """ 63 Augment this SimLibrary with the information from another SimLibrary 64 65 :param other: The other SimLibrary 66 """ 67 self.procedures.update(other.procedures) 68 self.non_returning.update(other.non_returning) 69 self.prototypes.update(other.prototypes) 70 self.default_ccs.update(other.default_ccs) 71 72 @property 73 def name(self): 74 """ 75 The first common name of this library, e.g. libc.so.6, or '??????' if none are known. 76 """ 77 return self.names[0] if self.names else '??????' 78 79 def set_library_names(self, *names): 80 """ 81 Set some common names of this library by which it may be referred during linking 82 83 :param names: Any number of string library names may be passed as varargs. 84 """ 85 for name in names: 86 self.names.append(name) 87 SIM_LIBRARIES[name] = self 88 89 def set_default_cc(self, arch_name, cc_cls): 90 """ 91 Set the default calling convention used for this library under a given architecture 92 93 :param arch_name: The string name of the architecture, i.e. the ``.name`` field from archinfo. 94 :parm cc_cls: The SimCC class (not an instance!) to use 95 """ 96 arch_name = archinfo.arch_from_id(arch_name).name 97 self.default_ccs[arch_name] = cc_cls 98 99 def set_non_returning(self, *names): 100 """ 101 Mark some functions in this class as never returning, i.e. loops forever or terminates execution 102 103 :param names: Any number of string function names may be passed as varargs 104 """ 105 for name in names: 106 self.non_returning.add(name) 107 108 def set_prototype(self, name, proto): 109 """ 110 Set the prototype of a function in the form of a SimTypeFunction containing argument and return types 111 112 :param name: The name of the function as a string 113 :param proto: The prototype of the function as a SimTypeFunction 114 """ 115 self.prototypes[name] = proto 116 117 def set_prototypes(self, protos): 118 """ 119 Set the prototypes of many functions 120 121 :param protos: Dictionary mapping function names to SimTypeFunction objects 122 """ 123 self.prototypes.update(protos) 124 125 def set_c_prototype(self, c_decl): 126 """ 127 Set the prototype of a function in the form of a C-style function declaration. 128 129 :param str c_decl: The C-style declaration of the function. 130 :return: A tuple of (function name, function prototype) 131 :rtype: tuple 132 """ 133 134 parsed = parse_file(c_decl) 135 parsed_decl = parsed[0] 136 if not parsed_decl: 137 raise ValueError('Cannot parse the function prototype.') 138 func_name, func_proto = next(iter(parsed_decl.items())) 139 140 self.set_prototype(func_name, func_proto) 141 142 return func_name, func_proto 143 144 def add(self, name, proc_cls, **kwargs): 145 """ 146 Add a function implementation fo the library. 147 148 :param name: The name of the function as a string 149 :param proc_cls: The implementation of the function as a SimProcedure _class_, not instance 150 :param kwargs: Any additional parameters to the procedure class constructor may be passed as kwargs 151 """ 152 self.procedures[name] = proc_cls(display_name=name, **kwargs) 153 154 def add_all_from_dict(self, dictionary, **kwargs): 155 """ 156 Batch-add function implementations to the library. 157 158 :param dictionary: A mapping from name to procedure class, i.e. the first two arguments to add() 159 :param kwargs: Any additional kwargs will be passed to the constructors of _each_ procedure class 160 """ 161 for name, procedure in dictionary.items(): 162 self.add(name, procedure, **kwargs) 163 164 def add_alias(self, name, *alt_names): 165 """ 166 Add some duplicate names for a given function. The original function's implementation must already be 167 registered. 168 169 :param name: The name of the function for which an implementation is already present 170 :param alt_names: Any number of alternate names may be passed as varargs 171 """ 172 old_procedure = self.procedures[name] 173 for alt in alt_names: 174 new_procedure = copy.deepcopy(old_procedure) 175 new_procedure.display_name = alt 176 self.procedures[alt] = new_procedure 177 178 def _apply_metadata(self, proc, arch): 179 if proc.cc is None and arch.name in self.default_ccs: 180 proc.cc = self.default_ccs[arch.name](arch) 181 if proc.cc.func_ty is not None: 182 # Use inspect to extract the parameters from the run python function 183 proc.cc.func_ty.arg_names = inspect.getfullargspec(proc.run).args[1:] 184 if proc.cc is None and arch.name in self.fallback_cc: 185 proc.cc = self.fallback_cc[arch.name](arch) 186 if proc.display_name in self.prototypes: 187 proc.cc.func_ty = self.prototypes[proc.display_name].with_arch(arch) 188 # Use inspect to extract the parameters from the run python function 189 proc.cc.func_ty.arg_names = inspect.getfullargspec(proc.run).args[1:] 190 if not proc.ARGS_MISMATCH: 191 proc.cc.num_args = len(proc.cc.func_ty.args) 192 proc.num_args = len(proc.cc.func_ty.args) 193 if proc.display_name in self.non_returning: 194 proc.returns = False 195 proc.library_name = self.name 196 197 def get(self, name, arch): 198 """ 199 Get an implementation of the given function specialized for the given arch, or a stub procedure if none exists. 200 201 :param name: The name of the function as a string 202 :param arch: The architecure to use, as either a string or an archinfo.Arch instance 203 :return: A SimProcedure instance representing the function as found in the library 204 """ 205 if type(arch) is str: 206 arch = archinfo.arch_from_id(arch) 207 if name in self.procedures: 208 proc = copy.deepcopy(self.procedures[name]) 209 self._apply_metadata(proc, arch) 210 return proc 211 else: 212 return self.get_stub(name, arch) 213 214 def get_stub(self, name, arch): 215 """ 216 Get a stub procedure for the given function, regardless of if a real implementation is available. This will 217 apply any metadata, such as a default calling convention or a function prototype. 218 219 By stub, we pretty much always mean a ``ReturnUnconstrained`` SimProcedure with the appropriate display name 220 and metadata set. This will appear in ``state.history.descriptions`` as ``<SimProcedure display_name (stub)>`` 221 222 :param name: The name of the function as a string 223 :param arch: The architecture to use, as either a string or an archinfo.Arch instance 224 :return: A SimProcedure instance representing a plausable stub as could be found in the library. 225 """ 226 proc = self.fallback_proc(display_name=name, is_stub=True) 227 self._apply_metadata(proc, arch) 228 return proc 229 230 def get_prototype(self, name: str, arch=None) -> Optional[SimTypeFunction]: 231 """ 232 Get a prototype of the given function name, optionally specialize the prototype to a given architecture. 233 234 :param name: Name of the function. 235 :param arch: The architecture to specialize to. 236 :return: Prototype of the function, or None if the prototype does not exist. 237 """ 238 proto = self.prototypes.get(name, None) 239 if proto is None: 240 return None 241 if arch is not None: 242 return proto.with_arch(arch) 243 return proto 244 245 def has_metadata(self, name): 246 """ 247 Check if a function has either an implementation or any metadata associated with it 248 249 :param name: The name of the function as a string 250 :return: A bool indicating if anything is known about the function 251 """ 252 return self.has_implementation(name) or \ 253 name in self.non_returning or \ 254 name in self.prototypes 255 256 def has_implementation(self, name): 257 """ 258 Check if a function has an implementation associated with it 259 260 :param name: The name of the function as a string 261 :return: A bool indicating if an implementation of the function is available 262 """ 263 return name in self.procedures 264 265 def has_prototype(self, func_name): 266 """ 267 Check if a function has a prototype associated with it. 268 269 :param str func_name: The name of the function. 270 :return: A bool indicating if a prototype of the function is available. 271 :rtype: bool 272 """ 273 274 return func_name in self.prototypes 275 276 277class SimCppLibrary(SimLibrary): 278 """ 279 SimCppLibrary is a specialized version of SimLibrary that will demangle C++ function names before looking for an 280 implementation or prototype for it. 281 """ 282 283 @staticmethod 284 def _try_demangle(name): 285 if name[0:2] == "_Z": 286 try: 287 ast = itanium_demangler.parse(name) 288 except NotImplementedError: 289 return name 290 if ast: 291 return str(ast) 292 return name 293 294 @staticmethod 295 def _proto_from_demangled_name(name: str) -> Optional[SimCC]: 296 """ 297 Attempt to extract arguments and calling convention information for a C++ function whose name was mangled 298 according to the Itanium C++ ABI symbol mangling language. 299 300 :param name: The demangled function name. 301 :return: A calling convention or None if a calling convention cannot be found. 302 """ 303 304 try: 305 parsed, _ = parse_cpp_file(name, with_param_names=False) 306 except ValueError: 307 return None 308 if not parsed: 309 return None 310 _, func_proto = next(iter(parsed.items())) 311 return func_proto 312 313 def get(self, name, arch): 314 """ 315 Get an implementation of the given function specialized for the given arch, or a stub procedure if none exists. 316 Demangle the function name if it is a mangled C++ name. 317 318 :param str name: The name of the function as a string 319 :param arch: The architecure to use, as either a string or an archinfo.Arch instance 320 :return: A SimProcedure instance representing the function as found in the library 321 """ 322 demangled_name = self._try_demangle(name) 323 if demangled_name not in self.procedures: 324 return self.get_stub(name, arch) # get_stub() might use the mangled name to derive the function prototype 325 return super().get(demangled_name, arch) 326 327 def get_stub(self, name, arch): 328 """ 329 Get a stub procedure for the given function, regardless of if a real implementation is available. This will 330 apply any metadata, such as a default calling convention or a function prototype. Demangle the function name 331 if it is a mangled C++ name. 332 333 :param str name: The name of the function as a string 334 :param arch: The architecture to use, as either a string or an archinfo.Arch instance 335 :return: A SimProcedure instance representing a plausable stub as could be found in the library. 336 """ 337 demangled_name = self._try_demangle(name) 338 stub = super().get_stub(demangled_name, arch) 339 # try to determine a prototype from the function name if possible 340 if demangled_name != name: 341 # itanium-mangled function name 342 stub.cc.set_func_type_with_arch(self._proto_from_demangled_name(demangled_name)) 343 if stub.cc.func_ty is not None and not stub.ARGS_MISMATCH: 344 stub.cc.num_args = len(stub.cc.func_ty.args) 345 stub.num_args = len(stub.cc.func_ty.args) 346 return stub 347 348 def get_prototype(self, name: str, arch=None) -> Optional[SimTypeFunction]: 349 """ 350 Get a prototype of the given function name, optionally specialize the prototype to a given architecture. The 351 function name will be demangled first. 352 353 :param name: Name of the function. 354 :param arch: The architecture to specialize to. 355 :return: Prototype of the function, or None if the prototype does not exist. 356 """ 357 demangled_name = self._try_demangle(name) 358 return super().get_prototype(demangled_name, arch=arch) 359 360 def has_metadata(self, name): 361 """ 362 Check if a function has either an implementation or any metadata associated with it. Demangle the function name 363 if it is a mangled C++ name. 364 365 :param name: The name of the function as a string 366 :return: A bool indicating if anything is known about the function 367 """ 368 name = self._try_demangle(name) 369 return super().has_metadata(name) 370 371 def has_implementation(self, name): 372 """ 373 Check if a function has an implementation associated with it. Demangle the function name if it is a mangled C++ 374 name. 375 376 :param str name: A mangled function name. 377 :return: bool 378 """ 379 return super().has_implementation(self._try_demangle(name)) 380 381 def has_prototype(self, func_name): 382 """ 383 Check if a function has a prototype associated with it. Demangle the function name if it is a mangled C++ name. 384 385 :param str name: A mangled function name. 386 :return: bool 387 """ 388 return super().has_prototype(self._try_demangle(func_name)) 389 390 391class SimSyscallLibrary(SimLibrary): 392 """ 393 SimSyscallLibrary is a specialized version of SimLibrary for dealing not with a dynamic library's API but rather 394 an operating system's syscall API. Because this interface is inherently lower-level than a dynamic library, many 395 parts of this class has been changed to store data based on an "ABI name" (ABI = application binary interface, 396 like an API but for when there's no programming language) instead of an architecture. An ABI name is just an 397 arbitrary string with which a calling convention and a syscall numbering is associated. 398 399 All the SimLibrary methods for adding functions still work, but now there's an additional layer on top that 400 associates them with numbers. 401 """ 402 def __init__(self): 403 super().__init__() 404 self.syscall_number_mapping: Dict[str,Dict[int,str]] = defaultdict(dict) # keyed by abi 405 self.syscall_name_mapping: Dict[str,Dict[str,int]] = defaultdict(dict) # keyed by abi 406 self.default_cc_mapping: Dict[str,Type['SimCCSyscall']] = {} # keyed by abi 407 self.syscall_prototypes: Dict[str,Dict[str,SimTypeFunction]] = defaultdict(dict) # keyed by abi 408 self.fallback_proc = stub_syscall 409 410 def copy(self): 411 o = SimSyscallLibrary() 412 o.procedures = dict(self.procedures) 413 o.non_returning = set(self.non_returning) 414 o.prototypes = dict(self.prototypes) 415 o.default_ccs = dict(self.default_ccs) 416 o.names = list(self.names) 417 o.syscall_number_mapping = defaultdict(dict, self.syscall_number_mapping) # {abi: {number: name}} 418 o.syscall_name_mapping = defaultdict(dict, self.syscall_name_mapping) # {abi: {name: number}} 419 o.syscall_prototypes = defaultdict(dict, self.syscall_prototypes) # as above 420 o.default_cc_mapping = dict(self.default_cc_mapping) # {abi: cc} 421 return o 422 423 def update(self, other): 424 super().update(other) 425 self.syscall_number_mapping.update(other.syscall_number_mapping) 426 self.syscall_name_mapping.update(other.syscall_name_mapping) 427 self.default_cc_mapping.update(other.default_cc_mapping) 428 429 def minimum_syscall_number(self, abi): 430 """ 431 :param abi: The abi to evaluate 432 :return: The smallest syscall number known for the given abi 433 """ 434 if abi not in self.syscall_number_mapping or \ 435 not self.syscall_number_mapping[abi]: 436 return 0 437 return min(self.syscall_number_mapping[abi]) 438 439 def maximum_syscall_number(self, abi): 440 """ 441 :param abi: The abi to evaluate 442 :return: The largest syscall number known for the given abi 443 """ 444 if abi not in self.syscall_number_mapping or \ 445 not self.syscall_number_mapping[abi]: 446 return 0 447 return max(self.syscall_number_mapping[abi]) 448 449 def add_number_mapping(self, abi, number, name): 450 """ 451 Associate a syscall number with the name of a function present in the underlying SimLibrary 452 453 :param abi: The abi for which this mapping applies 454 :param number: The syscall number 455 :param name: The name of the function 456 """ 457 self.syscall_number_mapping[abi][number] = name 458 self.syscall_name_mapping[abi][name] = number 459 460 def add_number_mapping_from_dict(self, abi, mapping): 461 """ 462 Batch-associate syscall numbers with names of functions present in the underlying SimLibrary 463 464 :param abi: The abi for which this mapping applies 465 :param mapping: A dict mapping syscall numbers to function names 466 """ 467 self.syscall_number_mapping[abi].update(mapping) 468 self.syscall_name_mapping[abi].update(dict(reversed(i) for i in mapping.items())) 469 470 def set_abi_cc(self, abi, cc_cls): 471 """ 472 Set the default calling convention for an abi 473 474 :param abi: The name of the abi 475 :param cc_cls: A SimCC _class_, not an instance, that should be used for syscalls using the abi 476 """ 477 self.default_cc_mapping[abi] = cc_cls 478 479 def set_prototype(self, abi: str, name: str, proto: SimTypeFunction) -> None: # pylint: disable=arguments-differ 480 """ 481 Set the prototype of a function in the form of a SimTypeFunction containing argument and return types 482 483 :param abi: ABI of the syscall. 484 :param name: The name of the syscall as a string 485 :param proto: The prototype of the syscall as a SimTypeFunction 486 """ 487 self.syscall_prototypes[abi][name] = proto 488 489 def set_prototypes(self, abi: str, protos: Dict[str,SimTypeFunction]) -> None: # pylint: disable=arguments-differ 490 """ 491 Set the prototypes of many syscalls. 492 493 :param abi: ABI of the syscalls. 494 :param protos: Dictionary mapping syscall names to SimTypeFunction objects 495 """ 496 self.syscall_prototypes[abi].update(protos) 497 498 def _canonicalize(self, number, arch, abi_list): 499 if type(arch) is str: 500 arch = archinfo.arch_from_id(arch) 501 if type(number) is str: 502 return number, arch, None 503 for abi in abi_list: 504 mapping = self.syscall_number_mapping[abi] 505 if number in mapping: 506 return mapping[number], arch, abi 507 return 'sys_%d' % number, arch, None 508 509 def _apply_numerical_metadata(self, proc, number, arch, abi): 510 proc.syscall_number = number 511 proc.abi = abi 512 if abi in self.default_cc_mapping: 513 cc = self.default_cc_mapping[abi](arch) 514 if proc.cc is not None: 515 cc.set_func_type_with_arch(proc.cc.func_ty) 516 proc.cc = cc 517 # a bit of a hack. 518 name = proc.display_name 519 if self.syscall_prototypes[abi].get(name, None) is not None and proc.cc is not None: 520 proc.cc.func_ty = self.syscall_prototypes[abi][name].with_arch(arch) 521 522 # pylint: disable=arguments-differ 523 def get(self, number, arch, abi_list=()): 524 """ 525 The get() function for SimSyscallLibrary looks a little different from its original version. 526 527 Instead of providing a name, you provide a number, and you additionally provide a list of abi names that are 528 applicable. The first abi for which the number is present in the mapping will be chosen. This allows for the 529 easy abstractions of architectures like ARM or MIPS linux for which there are many ABIs that can be used at any 530 time by using syscall numbers from various ranges. If no abi knows about the number, the stub procedure with 531 the name "sys_%d" will be used. 532 533 :param number: The syscall number 534 :param arch: The architecture being worked with, as either a string name or an archinfo.Arch 535 :param abi_list: A list of ABI names that could be used 536 :return: A SimProcedure representing the implementation of the given syscall, or a stub if no 537 implementation is available 538 """ 539 name, arch, abi = self._canonicalize(number, arch, abi_list) 540 proc = super().get(name, arch) 541 proc.is_syscall = True 542 self._apply_numerical_metadata(proc, number, arch, abi) 543 return proc 544 545 def get_stub(self, number, arch, abi_list=()): 546 """ 547 Pretty much the intersection of SimLibrary.get_stub() and SimSyscallLibrary.get(). 548 549 :param number: The syscall number 550 :param arch: The architecture being worked with, as either a string name or an archinfo.Arch 551 :param abi_list: A list of ABI names that could be used 552 :return: A SimProcedure representing a plausable stub that could model the syscall 553 """ 554 name, arch, abi = self._canonicalize(number, arch, abi_list) 555 proc = super().get_stub(name, arch) 556 self._apply_numerical_metadata(proc, number, arch, abi) 557 l.debug("unsupported syscall: %s", number) 558 return proc 559 560 def get_prototype(self, abi: str, name: str, arch=None) -> Optional[SimTypeFunction]: 561 """ 562 Get a prototype of the given syscall name and its ABI, optionally specialize the prototype to a given 563 architecture. 564 565 :param abi: ABI of the prototype to get. 566 :param name: Name of the syscall. 567 :param arch: The architecture to specialize to. 568 :return: Prototype of the syscall, or None if the prototype does not exist. 569 """ 570 if abi not in self.syscall_prototypes: 571 return None 572 proto = self.syscall_prototypes[abi].get(name, None) 573 if proto is None: 574 return None 575 return proto.with_arch(arch=arch) 576 577 def has_metadata(self, number, arch, abi_list=()): 578 """ 579 Pretty much the intersection of SimLibrary.has_metadata() and SimSyscallLibrary.get(). 580 581 :param number: The syscall number 582 :param arch: The architecture being worked with, as either a string name or an archinfo.Arch 583 :param abi_list: A list of ABI names that could be used 584 :return: A bool of whether or not any implementation or metadata is known about the given syscall 585 """ 586 name, _, _ = self._canonicalize(number, arch, abi_list) 587 return super().has_metadata(name) 588 589 def has_implementation(self, number, arch, abi_list=()): 590 """ 591 Pretty much the intersection of SimLibrary.has_implementation() and SimSyscallLibrary.get(). 592 593 :param number: The syscall number 594 :param arch: The architecture being worked with, as either a string name or an archinfo.Arch 595 :param abi_list: A list of ABI names that could be used 596 :return: A bool of whether or not an implementation of the syscall is available 597 """ 598 name, _, _ = self._canonicalize(number, arch, abi_list) 599 return super().has_implementation(name) 600 601 def has_prototype(self, abi: str, name: str) -> bool: 602 """ 603 Check if a function has a prototype associated with it. Demangle the function name if it is a mangled C++ name. 604 605 :param abi: Name of the ABI. 606 :param name: The syscall name. 607 :return: bool 608 """ 609 if abi not in self.syscall_prototypes: 610 return False 611 return name in self.syscall_prototypes[abi] 612 613 614for _ in autoimport.auto_import_modules('angr.procedures.definitions', os.path.dirname(os.path.realpath(__file__))): 615 pass 616