1import itertools 2import logging 3import sys 4from collections import defaultdict 5from functools import reduce 6from typing import Dict 7 8import claripy 9import networkx 10import pyvex 11from archinfo import ArchARM 12 13 14from ... import BP, BP_BEFORE, BP_AFTER, SIM_PROCEDURES, procedures 15from ... import options as o 16from ...engines.procedure import ProcedureEngine 17from ...exploration_techniques.loop_seer import LoopSeer 18from ...exploration_techniques.slicecutor import Slicecutor 19from ...exploration_techniques.explorer import Explorer 20from ...exploration_techniques.lengthlimiter import LengthLimiter 21from ...errors import AngrCFGError, AngrError, AngrSkipJobNotice, SimError, SimValueError, SimSolverModeError, \ 22 SimFastPathError, SimIRSBError, AngrExitError, SimEmptyCallStackError 23from ...sim_state import SimState 24from ...state_plugins.callstack import CallStack 25from ...state_plugins.sim_action import SimActionData 26from ...knowledge_plugins.cfg import CFGENode, IndirectJump 27from ...utils.constants import DEFAULT_STATEMENT 28from ..forward_analysis import ForwardAnalysis 29from .cfg_base import CFGBase 30from .cfg_job_base import BlockID, CFGJobBase 31from .cfg_utils import CFGUtils 32 33l = logging.getLogger(name=__name__) 34 35 36class CFGJob(CFGJobBase): 37 def __init__(self, *args, **kwargs): 38 super(CFGJob, self).__init__(*args, **kwargs) 39 40 # 41 # local variables used during analysis 42 # 43 44 if self.jumpkind is None: 45 # load jumpkind from path.state.scratch 46 self.jumpkind = 'Ijk_Boring' if self.state.history.jumpkind is None else \ 47 self.state.history.jumpkind 48 49 self.call_stack_suffix = None 50 self.current_function = None 51 52 self.cfg_node = None 53 self.sim_successors = None 54 self.exception_info = None 55 self.successor_status = None 56 57 self.extra_info = None 58 59 @property 60 def block_id(self): 61 if self._block_id is None: 62 # generate a new block ID 63 self._block_id = CFGEmulated._generate_block_id( 64 self.call_stack.stack_suffix(self._context_sensitivity_level), self.addr, self.is_syscall) 65 return self._block_id 66 67 @property 68 def is_syscall(self): 69 return self.jumpkind is not None and self.jumpkind.startswith('Ijk_Sys') 70 71 def __hash__(self): 72 return hash(self.block_id) 73 74 75class PendingJob: 76 def __init__(self, caller_func_addr, returning_source, state, src_block_id, src_exit_stmt_idx, src_exit_ins_addr, 77 call_stack): 78 """ 79 A PendingJob is whatever will be put into our pending_exit list. A pending exit is an entry that created by the 80 returning of a call or syscall. It is "pending" since we cannot immediately figure out whether this entry will 81 be executed or not. If the corresponding call/syscall intentially doesn't return, then the pending exit will be 82 removed. If the corresponding call/syscall returns, then the pending exit will be removed as well (since a real 83 entry is created from the returning and will be analyzed later). If the corresponding call/syscall might 84 return, but for some reason (for example, an unsupported instruction is met during the analysis) our analysis 85 does not return properly, then the pending exit will be picked up and put into remaining_jobs list. 86 87 :param returning_source: Address of the callee function. It might be None if address of the callee is not 88 resolvable. 89 :param state: The state after returning from the callee function. Of course there is no way to get 90 a precise state without emulating the execution of the callee, but at least we can 91 properly adjust the stack and registers to imitate the real returned state. 92 :param call_stack: A callstack. 93 """ 94 95 self.caller_func_addr = caller_func_addr 96 self.returning_source = returning_source 97 self.state = state 98 self.src_block_id = src_block_id 99 self.src_exit_stmt_idx = src_exit_stmt_idx 100 self.src_exit_ins_addr = src_exit_ins_addr 101 self.call_stack = call_stack 102 103 def __repr__(self): 104 return "<PendingJob to %s, from function %s>" % (self.state.ip, hex( 105 self.returning_source) if self.returning_source is not None else 'Unknown') 106 107 def __hash__(self): 108 return hash((self.caller_func_addr, self.returning_source, self.src_block_id, self.src_exit_stmt_idx, 109 self.src_exit_ins_addr 110 ) 111 ) 112 113 def __eq__(self, other): 114 if not isinstance(other, PendingJob): 115 return False 116 return self.caller_func_addr == other.caller_func_addr and \ 117 self.returning_source == other.returning_source and \ 118 self.src_block_id == other.src_block_id and \ 119 self.src_exit_stmt_idx == other.src_exit_stmt_idx and \ 120 self.src_exit_ins_addr == other.src_exit_ins_addr 121 122 123class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method 124 """ 125 This class represents a control-flow graph. 126 """ 127 128 tag = "CFGEmulated" 129 130 def __init__(self, 131 context_sensitivity_level=1, 132 start=None, 133 avoid_runs=None, 134 enable_function_hints=False, 135 call_depth=None, 136 call_tracing_filter=None, 137 initial_state=None, 138 starts=None, 139 keep_state=False, 140 indirect_jump_target_limit=100000, 141 resolve_indirect_jumps=True, 142 enable_advanced_backward_slicing=False, 143 enable_symbolic_back_traversal=False, 144 indirect_jump_resolvers=None, 145 additional_edges=None, 146 no_construct=False, 147 normalize=False, 148 max_iterations=1, 149 address_whitelist=None, 150 base_graph=None, 151 iropt_level=None, 152 max_steps=None, 153 state_add_options=None, 154 state_remove_options=None, 155 model=None, 156 ): 157 """ 158 All parameters are optional. 159 160 :param context_sensitivity_level: The level of context-sensitivity of this CFG (see documentation for 161 further details). It ranges from 0 to infinity. Default 1. 162 :param avoid_runs: A list of runs to avoid. 163 :param enable_function_hints: Whether to use function hints (constants that might be used as exit 164 targets) or not. 165 :param call_depth: How deep in the call stack to trace. 166 :param call_tracing_filter: Filter to apply on a given path and jumpkind to determine if it 167 should be skipped when call_depth is reached. 168 :param initial_state: An initial state to use to begin analysis. 169 :param iterable starts: A collection of starting points to begin analysis. It can contain 170 the following three different types of entries: an address specified 171 as an integer, a 2-tuple that includes an integer address and a 172 jumpkind, or a SimState instance. Unsupported entries in starts will 173 lead to an AngrCFGError being raised. 174 :param keep_state: Whether to keep the SimStates for each CFGNode. 175 :param resolve_indirect_jumps: Whether to enable the indirect jump resolvers for resolving indirect jumps 176 :param enable_advanced_backward_slicing: Whether to enable an intensive technique for resolving indirect jumps 177 :param enable_symbolic_back_traversal: Whether to enable an intensive technique for resolving indirect jumps 178 :param list indirect_jump_resolvers: A custom list of indirect jump resolvers. If this list is None or empty, 179 default indirect jump resolvers specific to this architecture and binary 180 types will be loaded. 181 :param additional_edges: A dict mapping addresses of basic blocks to addresses of 182 successors to manually include and analyze forward from. 183 :param bool no_construct: Skip the construction procedure. Only used in unit-testing. 184 :param bool normalize: If the CFG as well as all Function graphs should be normalized or 185 not. 186 :param int max_iterations: The maximum number of iterations that each basic block should be 187 "executed". 1 by default. Larger numbers of iterations are usually 188 required for complex analyses like loop analysis. 189 :param iterable address_whitelist: A list of allowed addresses. Any basic blocks outside of this 190 collection of addresses will be ignored. 191 :param networkx.DiGraph base_graph: A basic control flow graph to follow. Each node inside this graph 192 must have the following properties: `addr` and `size`. CFG recovery 193 will strictly follow nodes and edges shown in the graph, and discard 194 any contorl flow that does not follow an existing edge in the base 195 graph. For example, you can pass in a Function local transition 196 graph as the base graph, and CFGEmulated will traverse nodes and 197 edges and extract useful information. 198 :param int iropt_level: The optimization level of VEX IR (0, 1, 2). The default level will 199 be used if `iropt_level` is None. 200 :param int max_steps: The maximum number of basic blocks to recover forthe longest path 201 from each start before pausing the recovery procedure. 202 :param state_add_options: State options that will be added to the initial state. 203 :param state_remove_options: State options that will be removed from the initial state. 204 """ 205 ForwardAnalysis.__init__(self, order_jobs=True if base_graph is not None else False) 206 CFGBase.__init__(self, 'emulated', context_sensitivity_level, normalize=normalize, 207 resolve_indirect_jumps=resolve_indirect_jumps, 208 indirect_jump_resolvers=indirect_jump_resolvers, 209 indirect_jump_target_limit=indirect_jump_target_limit, 210 model=model, 211 ) 212 213 if start is not None: 214 l.warning("`start` is deprecated. Please consider using `starts` instead in your code.") 215 self._starts = (start,) 216 else: 217 if isinstance(starts, (list, set)): 218 self._starts = tuple(starts) 219 elif isinstance(starts, tuple) or starts is None: 220 self._starts = starts 221 else: 222 raise AngrCFGError('Unsupported type of the `starts` argument.') 223 224 if enable_advanced_backward_slicing or enable_symbolic_back_traversal: 225 l.warning("`advanced backward slicing` and `symbolic back traversal` are deprecated.") 226 l.warning("Please use `resolve_indirect_jumps` to resolve indirect jumps using different resolvers instead.") 227 228 self._iropt_level = iropt_level 229 self._avoid_runs = avoid_runs 230 self._enable_function_hints = enable_function_hints 231 self._call_depth = call_depth 232 self._call_tracing_filter = call_tracing_filter 233 self._initial_state = initial_state 234 self._keep_state = keep_state 235 self._advanced_backward_slicing = enable_advanced_backward_slicing 236 self._enable_symbolic_back_traversal = enable_symbolic_back_traversal 237 self._additional_edges = additional_edges if additional_edges else {} 238 self._max_steps = max_steps 239 self._state_add_options = state_add_options if state_add_options is not None else set() 240 self._state_remove_options = state_remove_options if state_remove_options is not None else set() 241 self._state_add_options.update([o.SYMBOL_FILL_UNCONSTRAINED_MEMORY, o.SYMBOL_FILL_UNCONSTRAINED_REGISTERS]) 242 243 # add the track_memory_option if the enable function hint flag is set 244 if self._enable_function_hints and o.TRACK_MEMORY_ACTIONS not in self._state_add_options: 245 self._state_add_options.add(o.TRACK_MEMORY_ACTIONS) 246 247 # more initialization 248 249 self._symbolic_function_initial_state = {} 250 self._function_input_states = None 251 self._unresolvable_runs = set() 252 253 # Stores the index for each CFGNode in this CFG after a quasi-topological sort (currently a DFS) 254 # TODO: remove it since it's no longer used 255 self._quasi_topological_order = {} 256 # A copy of all entry points in this CFG. Integers 257 self._entry_points = [] 258 259 self._max_iterations = max_iterations 260 self._address_whitelist = set(address_whitelist) if address_whitelist is not None else None 261 self._base_graph = base_graph 262 self._node_addr_visiting_order = [ ] 263 264 if self._base_graph: 265 sorted_nodes = CFGUtils.quasi_topological_sort_nodes(self._base_graph) 266 self._node_addr_visiting_order = [ n.addr for n in sorted_nodes ] 267 268 self._sanitize_parameters() 269 270 self._executable_address_ranges = [] 271 self._executable_address_ranges = self._executable_memory_regions() 272 273 # These save input states of functions. It will be discarded after the CFG is constructed 274 self._function_input_states = {} 275 self._loop_back_edges = [] 276 self._overlapped_loop_headers = [] 277 self._pending_function_hints = set() 278 # A dict to log edges and the jumpkind between each basic block 279 self._edge_map = defaultdict(list) 280 281 self._model._iropt_level = self._iropt_level 282 283 self._start_keys = [ ] # a list of block IDs of all starts 284 285 # For each call, we are always getting two exits: an Ijk_Call that 286 # stands for the real call exit, and an Ijk_Ret that is a simulated exit 287 # for the retn address. There are certain cases that the control flow 288 # never returns to the next instruction of a callsite due to 289 # imprecision of the concrete execution. So we save those simulated 290 # exits here to increase our code coverage. Of course the real retn from 291 # that call always precedes those "fake" retns. 292 self._pending_jobs = defaultdict(list) # Dict[BlockID, List[PendingJob]] 293 294 # Counting how many times a basic block is traced into 295 self._traced_addrs = defaultdict(lambda: defaultdict(int)) 296 297 # A dict that collects essential parameters to properly reconstruct initial state for a block 298 self._block_artifacts = {} 299 self._analyzed_addrs = set() 300 self._non_returning_functions = set() 301 302 self._pending_edges = defaultdict(list) 303 304 if not no_construct: 305 self._initialize_cfg() 306 self._analyze() 307 308 # 309 # Public methods 310 # 311 312 def copy(self): 313 """ 314 Make a copy of the CFG. 315 316 :return: A copy of the CFG instance. 317 :rtype: angr.analyses.CFG 318 """ 319 new_cfg = CFGEmulated.__new__(CFGEmulated) 320 super(CFGEmulated, self).make_copy(new_cfg) 321 322 new_cfg._indirect_jump_target_limit = self._indirect_jump_target_limit 323 new_cfg.named_errors = dict(self.named_errors) 324 new_cfg.errors = list(self.errors) 325 new_cfg._fail_fast = self._fail_fast 326 new_cfg._max_steps = self._max_steps 327 new_cfg.project = self.project 328 329 # Intelligently (or stupidly... you tell me) fill it up 330 new_cfg._edge_map = self._edge_map.copy() 331 new_cfg._loop_back_edges = self._loop_back_edges[::] 332 new_cfg._executable_address_ranges = self._executable_address_ranges[::] 333 new_cfg._unresolvable_runs = self._unresolvable_runs.copy() 334 new_cfg._overlapped_loop_headers = self._overlapped_loop_headers[::] 335 new_cfg._thumb_addrs = self._thumb_addrs.copy() 336 new_cfg._keep_state = self._keep_state 337 338 return new_cfg 339 340 def resume(self, starts=None, max_steps=None): 341 """ 342 Resume a paused or terminated control flow graph recovery. 343 344 :param iterable starts: A collection of new starts to resume from. If `starts` is None, we will resume CFG 345 recovery from where it was paused before. 346 :param int max_steps: The maximum number of blocks on the longest path starting from each start before pausing 347 the recovery. 348 :return: None 349 """ 350 351 self._should_abort = False 352 353 self._starts = starts 354 self._max_steps = max_steps 355 356 if self._starts is None: 357 self._starts = [ ] 358 359 if self._starts: 360 self._sanitize_starts() 361 362 self._analyze() 363 364 def remove_cycles(self): 365 """ 366 Forces graph to become acyclic, removes all loop back edges and edges between overlapped loop headers and their 367 successors. 368 """ 369 # loop detection 370 # only detect loops after potential graph normalization 371 if not self._loop_back_edges: 372 l.debug("Detecting loops...") 373 self._detect_loops() 374 375 l.debug("Removing cycles...") 376 l.debug("There are %d loop back edges.", len(self._loop_back_edges)) 377 l.debug("And there are %d overlapping loop headers.", len(self._overlapped_loop_headers)) 378 # First break all detected loops 379 for b1, b2 in self._loop_back_edges: 380 if self._graph.has_edge(b1, b2): 381 l.debug("Removing loop back edge %s -> %s", b1, b2) 382 self._graph.remove_edge(b1, b2) 383 # Then remove all outedges from overlapped loop headers 384 for b in self._overlapped_loop_headers: 385 successors = self._graph.successors(b) 386 for succ in successors: 387 self._graph.remove_edge(b, succ) 388 l.debug("Removing partial loop header edge %s -> %s", b, succ) 389 390 def downsize(self): 391 """ 392 Remove saved states from all CFGNodes to reduce memory usage. 393 394 :return: None 395 """ 396 397 for cfg_node in self._nodes.values(): 398 cfg_node.downsize() 399 400 def unroll_loops(self, max_loop_unrolling_times): 401 """ 402 Unroll loops for each function. The resulting CFG may still contain loops due to recursion, function calls, etc. 403 404 :param int max_loop_unrolling_times: The maximum iterations of unrolling. 405 :return: None 406 """ 407 408 if not isinstance(max_loop_unrolling_times, int) or \ 409 max_loop_unrolling_times < 0: 410 raise AngrCFGError('Max loop unrolling times must be set to an integer greater than or equal to 0 if ' + 411 'loop unrolling is enabled.') 412 413 def _unroll(graph, loop): 414 """ 415 The loop callback method where loops are unrolled. 416 417 :param networkx.DiGraph graph: The control flow graph. 418 :param angr.analyses.loopfinder.Loop loop: The loop instance. 419 :return: None 420 """ 421 422 for back_edge in loop.continue_edges: 423 loop_body_addrs = {n.addr for n in loop.body_nodes} 424 src_blocknode = back_edge[0] # type: angr.knowledge.codenode.BlockNode 425 dst_blocknode = back_edge[1] # type: angr.knowledge.codenode.BlockNode 426 427 for src in self.get_all_nodes(src_blocknode.addr): 428 for dst in graph.successors(src): 429 if dst.addr != dst_blocknode.addr: 430 continue 431 432 # Duplicate the dst node 433 new_dst = dst.copy() 434 new_dst.looping_times = dst.looping_times + 1 435 if (new_dst not in graph and 436 # If the new_dst is already in the graph, we don't want to keep unrolling 437 # the this loop anymore since it may *create* a new loop. Of course we 438 # will lose some edges in this way, but in general it is acceptable. 439 new_dst.looping_times <= max_loop_unrolling_times 440 ): 441 # Log all successors of the dst node 442 dst_successors = graph.successors(dst) 443 # Add new_dst to the graph 444 edge_data = graph.get_edge_data(src, dst) 445 graph.add_edge(src, new_dst, **edge_data) 446 for ds in dst_successors: 447 if ds.looping_times == 0 and ds.addr not in loop_body_addrs: 448 edge_data = graph.get_edge_data(dst, ds) 449 graph.add_edge(new_dst, ds, **edge_data) 450 451 graph.remove_edge(src, dst) 452 453 self._detect_loops(loop_callback=_unroll) 454 455 def force_unroll_loops(self, max_loop_unrolling_times): 456 """ 457 Unroll loops globally. The resulting CFG does not contain any loop, but this method is slow on large graphs. 458 459 :param int max_loop_unrolling_times: The maximum iterations of unrolling. 460 :return: None 461 """ 462 463 if not isinstance(max_loop_unrolling_times, int) or \ 464 max_loop_unrolling_times < 0: 465 raise AngrCFGError('Max loop unrolling times must be set to an integer greater than or equal to 0 if ' + 466 'loop unrolling is enabled.') 467 468 # Traverse the CFG and try to find the beginning of loops 469 loop_backedges = [] 470 471 start = self._starts[0] 472 if isinstance(start, tuple): 473 start, _ = start # pylint: disable=unpacking-non-sequence 474 start_node = self.get_any_node(start) 475 if start_node is None: 476 raise AngrCFGError('Cannot find start node when trying to unroll loops. The CFG might be empty.') 477 478 graph_copy = networkx.DiGraph(self.graph) 479 480 while True: 481 cycles_iter = networkx.simple_cycles(graph_copy) 482 try: 483 cycle = next(cycles_iter) 484 except StopIteration: 485 break 486 487 loop_backedge = (None, None) 488 489 for n in networkx.dfs_preorder_nodes(graph_copy, source=start_node): 490 if n in cycle: 491 idx = cycle.index(n) 492 if idx == 0: 493 loop_backedge = (cycle[-1], cycle[idx]) 494 else: 495 loop_backedge = (cycle[idx - 1], cycle[idx]) 496 break 497 498 if loop_backedge not in loop_backedges: 499 loop_backedges.append(loop_backedge) 500 501 # Create a common end node for all nodes whose out_degree is 0 502 end_nodes = [n for n in graph_copy.nodes() if graph_copy.out_degree(n) == 0] 503 new_end_node = "end_node" 504 505 if not end_nodes: 506 # We gotta randomly break a loop 507 cycles = sorted(networkx.simple_cycles(graph_copy), key=len) 508 first_cycle = cycles[0] 509 if len(first_cycle) == 1: 510 graph_copy.remove_edge(first_cycle[0], first_cycle[0]) 511 else: 512 graph_copy.remove_edge(first_cycle[0], first_cycle[1]) 513 end_nodes = [n for n in graph_copy.nodes() if graph_copy.out_degree(n) == 0] 514 515 for en in end_nodes: 516 graph_copy.add_edge(en, new_end_node) 517 518 # postdoms = self.immediate_postdominators(new_end_node, target_graph=graph_copy) 519 # reverse_postdoms = defaultdict(list) 520 # for k, v in postdoms.items(): 521 # reverse_postdoms[v].append(k) 522 523 # Find all loop bodies 524 # for src, dst in loop_backedges: 525 # nodes_in_loop = { src, dst } 526 527 # while True: 528 # new_nodes = set() 529 530 # for n in nodes_in_loop: 531 # if n in reverse_postdoms: 532 # for node in reverse_postdoms[n]: 533 # if node not in nodes_in_loop: 534 # new_nodes.add(node) 535 536 # if not new_nodes: 537 # break 538 539 # nodes_in_loop |= new_nodes 540 541 # Unroll the loop body 542 # TODO: Finish the implementation 543 544 graph_copy.remove_node(new_end_node) 545 src, dst = loop_backedge 546 if graph_copy.has_edge(src, dst): # It might have been removed before 547 # Duplicate the dst node 548 new_dst = dst.copy() 549 new_dst.looping_times = dst.looping_times + 1 550 if ( 551 new_dst not in graph_copy and 552 # If the new_dst is already in the graph, we don't want to keep unrolling 553 # the this loop anymore since it may *create* a new loop. Of course we 554 # will lose some edges in this way, but in general it is acceptable. 555 new_dst.looping_times <= max_loop_unrolling_times): 556 # Log all successors of the dst node 557 dst_successors = list(graph_copy.successors(dst)) 558 # Add new_dst to the graph 559 edge_data = graph_copy.get_edge_data(src, dst) 560 graph_copy.add_edge(src, new_dst, **edge_data) 561 for ds in dst_successors: 562 if ds.looping_times == 0 and ds not in cycle: 563 edge_data = graph_copy.get_edge_data(dst, ds) 564 graph_copy.add_edge(new_dst, ds, **edge_data) 565 # Remove the original edge 566 graph_copy.remove_edge(src, dst) 567 568 # Update loop backedges 569 self._loop_back_edges = loop_backedges 570 571 self.model.graph = graph_copy 572 573 def immediate_dominators(self, start, target_graph=None): 574 """ 575 Get all immediate dominators of sub graph from given node upwards. 576 577 :param str start: id of the node to navigate forwards from. 578 :param networkx.classes.digraph.DiGraph target_graph: graph to analyse, default is self.graph. 579 580 :return: each node of graph as index values, with element as respective node's immediate dominator. 581 :rtype: dict 582 """ 583 return self._immediate_dominators(start, target_graph=target_graph, reverse_graph=False) 584 585 def immediate_postdominators(self, end, target_graph=None): 586 """ 587 Get all immediate postdominators of sub graph from given node upwards. 588 589 :param str start: id of the node to navigate forwards from. 590 :param networkx.classes.digraph.DiGraph target_graph: graph to analyse, default is self.graph. 591 592 :return: each node of graph as index values, with element as respective node's immediate dominator. 593 :rtype: dict 594 """ 595 return self._immediate_dominators(end, target_graph=target_graph, reverse_graph=True) 596 597 def remove_fakerets(self): 598 """ 599 Get rid of fake returns (i.e., Ijk_FakeRet edges) from this CFG 600 601 :return: None 602 """ 603 fakeret_edges = [ (src, dst) for src, dst, data in self.graph.edges(data=True) 604 if data['jumpkind'] == 'Ijk_FakeRet' ] 605 self.graph.remove_edges_from(fakeret_edges) 606 607 def get_topological_order(self, cfg_node): 608 """ 609 Get the topological order of a CFG Node. 610 611 :param cfg_node: A CFGNode instance. 612 :return: An integer representing its order, or None if the CFGNode does not exist in the graph. 613 """ 614 615 if not self._quasi_topological_order: 616 self._quasi_topological_sort() 617 618 return self._quasi_topological_order.get(cfg_node, None) 619 620 def get_subgraph(self, starting_node, block_addresses): 621 """ 622 Get a sub-graph out of a bunch of basic block addresses. 623 624 :param CFGNode starting_node: The beginning of the subgraph 625 :param iterable block_addresses: A collection of block addresses that should be included in the subgraph if 626 there is a path between `starting_node` and a CFGNode with the specified 627 address, and all nodes on the path should also be included in the subgraph. 628 :return: A new CFG that only contain the specific subgraph. 629 :rtype: CFGEmulated 630 """ 631 632 graph = networkx.DiGraph() 633 634 if starting_node not in self.graph: 635 raise AngrCFGError('get_subgraph(): the specified "starting_node" %s does not exist in the current CFG.' 636 % starting_node 637 ) 638 639 addr_set = set(block_addresses) 640 641 graph.add_node(starting_node) 642 queue = [ starting_node ] 643 644 while queue: 645 node = queue.pop() 646 for _, dst, data in self.graph.out_edges([node], data=True): 647 if dst not in graph and dst.addr in addr_set: 648 graph.add_edge(node, dst, **data) 649 queue.append(dst) 650 651 cfg = self.copy() 652 cfg._graph = graph 653 cfg._starts = (starting_node.addr, ) 654 655 return cfg 656 657 def get_function_subgraph(self, start, max_call_depth=None): 658 """ 659 Get a sub-graph of a certain function. 660 661 :param start: The function start. Currently it should be an integer. 662 :param max_call_depth: Call depth limit. None indicates no limit. 663 :return: A CFG instance which is a sub-graph of self.graph 664 """ 665 666 # FIXME: syscalls are not supported 667 # FIXME: start should also take a CFGNode instance 668 669 start_node = self.get_any_node(start) 670 671 node_wrapper = (start_node, 0) 672 stack = [node_wrapper] 673 traversed_nodes = {start_node} 674 subgraph_nodes = set([start_node]) 675 676 while stack: 677 nw = stack.pop() 678 n, call_depth = nw[0], nw[1] 679 680 # Get successors 681 edges = self.graph.out_edges(n, data=True) 682 683 for _, dst, data in edges: 684 if dst not in traversed_nodes: 685 # We see a new node! 686 traversed_nodes.add(dst) 687 688 if data['jumpkind'] == 'Ijk_Call': 689 if max_call_depth is None or (max_call_depth is not None and call_depth < max_call_depth): 690 subgraph_nodes.add(dst) 691 new_nw = (dst, call_depth + 1) 692 stack.append(new_nw) 693 elif data['jumpkind'] == 'Ijk_Ret': 694 if call_depth > 0: 695 subgraph_nodes.add(dst) 696 new_nw = (dst, call_depth - 1) 697 stack.append(new_nw) 698 else: 699 subgraph_nodes.add(dst) 700 new_nw = (dst, call_depth) 701 stack.append(new_nw) 702 703 #subgraph = networkx.subgraph(self.graph, subgraph_nodes) 704 subgraph = self.graph.subgraph(subgraph_nodes).copy() 705 706 # Make it a CFG instance 707 subcfg = self.copy() 708 subcfg._graph = subgraph 709 subcfg._starts = (start,) 710 711 return subcfg 712 713 @property 714 def context_sensitivity_level(self): 715 return self._context_sensitivity_level 716 717 # 718 # Serialization 719 # 720 721 def __setstate__(self, s): 722 self.project = s['project'] 723 self.indirect_jumps: Dict[int,IndirectJump] = s['indirect_jumps'] 724 self._loop_back_edges = s['_loop_back_edges'] 725 self._thumb_addrs = s['_thumb_addrs'] 726 self._unresolvable_runs = s['_unresolvable_runs'] 727 self._executable_address_ranges = s['_executable_address_ranges'] 728 self._iropt_level = s['_iropt_level'] 729 self._model = s['_model'] 730 731 def __getstate__(self): 732 s = { 733 'project': self.project, 734 "indirect_jumps": self.indirect_jumps, 735 '_loop_back_edges': self._loop_back_edges, 736 '_nodes_by_addr': self._nodes_by_addr, 737 '_thumb_addrs': self._thumb_addrs, 738 '_unresolvable_runs': self._unresolvable_runs, 739 '_executable_address_ranges': self._executable_address_ranges, 740 '_iropt_level': self._iropt_level, 741 '_model': self._model 742 } 743 744 return s 745 746 # 747 # Properties 748 # 749 750 @property 751 def graph(self): 752 return self._model.graph 753 754 @property 755 def unresolvables(self): 756 """ 757 Get those SimRuns that have non-resolvable exits. 758 759 :return: A set of SimRuns 760 :rtype: set 761 """ 762 return self._unresolvable_runs 763 764 @property 765 def deadends(self): 766 """ 767 Get all CFGNodes that has an out-degree of 0 768 769 :return: A list of CFGNode instances 770 :rtype: list 771 """ 772 if self.graph is None: 773 raise AngrCFGError('CFG hasn\'t been generated yet.') 774 775 deadends = [i for i in self.graph if self.graph.out_degree(i) == 0] 776 777 return deadends 778 779 # 780 # Private methods 781 # 782 783 # Initialization related methods 784 785 def _sanitize_parameters(self): 786 """ 787 Perform a sanity check on parameters passed in to CFG.__init__(). 788 An AngrCFGError is raised if any parameter fails the sanity check. 789 790 :return: None 791 """ 792 793 # Check additional_edges 794 if isinstance(self._additional_edges, (list, set, tuple)): 795 new_dict = defaultdict(list) 796 for s, d in self._additional_edges: 797 new_dict[s].append(d) 798 self._additional_edges = new_dict 799 800 elif isinstance(self._additional_edges, dict): 801 pass 802 803 else: 804 raise AngrCFGError('Additional edges can only be a list, set, tuple, or a dict.') 805 806 # Check _advanced_backward_slicing 807 if self._advanced_backward_slicing and self._enable_symbolic_back_traversal: 808 raise AngrCFGError('Advanced backward slicing and symbolic back traversal cannot both be enabled.') 809 810 if self._advanced_backward_slicing and not self._keep_state: 811 raise AngrCFGError('Keep state must be enabled if advanced backward slicing is enabled.') 812 813 # Sanitize avoid_runs 814 self._avoid_runs = [ ] if self._avoid_runs is None else self._avoid_runs 815 if not isinstance(self._avoid_runs, (list, set)): 816 raise AngrCFGError('"avoid_runs" must either be None, or a list or a set.') 817 818 self._sanitize_starts() 819 820 def _sanitize_starts(self): 821 # Sanitize starts 822 # Convert self._starts to a list of SimState instances or tuples of (ip, jumpkind) 823 if self._starts is None: 824 self._starts = ((self.project.entry, None),) 825 826 else: 827 new_starts = [ ] 828 for item in self._starts: 829 if isinstance(item, tuple): 830 if len(item) != 2: 831 raise AngrCFGError('Unsupported item in "starts": %s' % str(item)) 832 833 new_starts.append(item) 834 elif isinstance(item, int): 835 new_starts.append((item, None)) 836 837 elif isinstance(item, SimState): 838 new_starts.append(item) 839 840 else: 841 raise AngrCFGError('Unsupported item type in "starts": %s' % type(item)) 842 843 self._starts = new_starts 844 845 if not self._starts: 846 raise AngrCFGError("At least one start must be provided") 847 848 # CFG construction 849 # The main loop and sub-methods 850 851 def _job_key(self, job): 852 """ 853 Get the key for a specific CFG job. The key is a context-sensitive block ID. 854 855 :param CFGJob job: The CFGJob instance. 856 :return: The block ID of the specific CFG job. 857 :rtype: BlockID 858 """ 859 860 return job.block_id 861 862 def _job_sorting_key(self, job): 863 """ 864 Get the sorting key of a CFGJob instance. 865 866 :param CFGJob job: the CFGJob object. 867 :return: An integer that determines the order of this job in the queue. 868 :rtype: int 869 """ 870 871 if self._base_graph is None: 872 # we don't do sorting if there is no base_graph 873 return 0 874 875 MAX_JOBS = 1000000 876 877 if job.addr not in self._node_addr_visiting_order: 878 return MAX_JOBS 879 880 return self._node_addr_visiting_order.index(job.addr) 881 882 def _pre_analysis(self): 883 """ 884 Initialization work. Executed prior to the analysis. 885 886 :return: None 887 """ 888 889 # Fill up self._starts 890 for item in self._starts: 891 callstack = None 892 if isinstance(item, tuple): 893 # (addr, jumpkind) 894 ip = item[0] 895 state = self._create_initial_state(item[0], item[1]) 896 897 elif isinstance(item, SimState): 898 # SimState 899 state = item.copy() # pylint: disable=no-member 900 ip = state.solver.eval_one(state.ip) 901 self._reset_state_mode(state, 'fastpath') 902 903 else: 904 raise AngrCFGError('Unsupported CFG start type: %s.' % str(type(item))) 905 906 self._symbolic_function_initial_state[ip] = state 907 path_wrapper = CFGJob(ip, state, self._context_sensitivity_level, None, None, call_stack=callstack) 908 key = path_wrapper.block_id 909 if key not in self._start_keys: 910 self._start_keys.append(key) 911 912 self._insert_job(path_wrapper) 913 self._register_analysis_job(path_wrapper.func_addr, path_wrapper) 914 915 def _intra_analysis(self): 916 """ 917 During the analysis. We process function hints here. 918 919 :return: None 920 """ 921 922 # TODO: Why was the former two conditions there in the first place? 923 if self._pending_function_hints: 924 self._process_hints(self._analyzed_addrs) 925 926 def _job_queue_empty(self): 927 """ 928 A callback method called when the job queue is empty. 929 930 :return: None 931 """ 932 933 self._iteratively_clean_pending_exits() 934 935 while self._pending_jobs: 936 # We don't have any exits remaining. Let's pop out a pending exit 937 pending_job = self._get_one_pending_job() 938 if pending_job is None: 939 continue 940 941 self._insert_job(pending_job) 942 self._register_analysis_job(pending_job.func_addr, pending_job) 943 break 944 945 def _create_initial_state(self, ip, jumpkind): 946 """ 947 Obtain a SimState object for a specific address 948 949 Fastpath means the CFG generation will work in an IDA-like way, in which it will not try to execute every 950 single statement in the emulator, but will just do the decoding job. This is much faster than the old way. 951 952 :param int ip: The instruction pointer 953 :param str jumpkind: The jumpkind upon executing the block 954 :return: The newly-generated state 955 :rtype: SimState 956 """ 957 958 jumpkind = "Ijk_Boring" if jumpkind is None else jumpkind 959 960 if self._initial_state is None: 961 state = self.project.factory.blank_state(addr=ip, mode="fastpath", 962 add_options=self._state_add_options, 963 remove_options=self._state_remove_options, 964 ) 965 self._initial_state = state 966 else: 967 # FIXME: self._initial_state is deprecated. This branch will be removed soon 968 state = self._initial_state 969 state.history.jumpkind = jumpkind 970 self._reset_state_mode(state, 'fastpath') 971 state._ip = state.solver.BVV(ip, self.project.arch.bits) 972 973 if jumpkind is not None: 974 state.history.jumpkind = jumpkind 975 976 # THIS IS A HACK FOR MIPS 977 if ip is not None and self.project.arch.name in ('MIPS32', 'MIPS64'): 978 # We assume this is a function start 979 state.regs.t9 = ip 980 # TODO there was at one point special logic for the ppc64 table of contents but it seems to have bitrotted 981 982 return state 983 984 def _get_one_pending_job(self): 985 """ 986 Retrieve a pending job. 987 988 :return: A CFGJob instance or None 989 """ 990 991 pending_job_key = next(iter(self._pending_jobs.keys())) 992 pending_job = self._pending_jobs[pending_job_key].pop() 993 if len(self._pending_jobs[pending_job_key]) == 0: 994 del self._pending_jobs[pending_job_key] 995 996 pending_job_state = pending_job.state 997 pending_job_call_stack = pending_job.call_stack 998 pending_job_src_block_id = pending_job.src_block_id 999 pending_job_src_exit_stmt_idx = pending_job.src_exit_stmt_idx 1000 1001 self._deregister_analysis_job(pending_job.caller_func_addr, pending_job) 1002 1003 # Let's check whether this address has been traced before. 1004 if pending_job_key in self._nodes: 1005 node = self._nodes[pending_job_key] 1006 if node in self.graph: 1007 pending_exit_addr = self._block_id_addr(pending_job_key) 1008 # That block has been traced before. Let's forget about it 1009 l.debug("Target 0x%08x has been traced before. Trying the next one...", pending_exit_addr) 1010 1011 # However, we should still create the FakeRet edge 1012 self._graph_add_edge(pending_job_src_block_id, pending_job_key, jumpkind="Ijk_FakeRet", 1013 stmt_idx=pending_job_src_exit_stmt_idx, ins_addr=pending_job.src_exit_ins_addr) 1014 1015 return None 1016 1017 pending_job_state.history.jumpkind = 'Ijk_FakeRet' 1018 1019 job = CFGJob(pending_job_state.addr, 1020 pending_job_state, 1021 self._context_sensitivity_level, 1022 src_block_id=pending_job_src_block_id, 1023 src_exit_stmt_idx=pending_job_src_exit_stmt_idx, 1024 src_ins_addr=pending_job.src_exit_ins_addr, 1025 call_stack=pending_job_call_stack, 1026 ) 1027 l.debug("Tracing a missing return exit %s", self._block_id_repr(pending_job_key)) 1028 1029 return job 1030 1031 def _process_hints(self, analyzed_addrs): 1032 """ 1033 Process function hints in the binary. 1034 1035 :return: None 1036 """ 1037 1038 # Function hints! 1039 # Now let's see how many new functions we can get here... 1040 while self._pending_function_hints: 1041 f = self._pending_function_hints.pop() 1042 if f not in analyzed_addrs: 1043 new_state = self.project.factory.entry_state(mode='fastpath') 1044 new_state.ip = new_state.solver.BVV(f, self.project.arch.bits) 1045 1046 # TOOD: Specially for MIPS 1047 if new_state.arch.name in ('MIPS32', 'MIPS64'): 1048 # Properly set t9 1049 new_state.registers.store('t9', f) 1050 1051 new_path_wrapper = CFGJob(f, 1052 new_state, 1053 self._context_sensitivity_level 1054 ) 1055 self._insert_job(new_path_wrapper) 1056 self._register_analysis_job(f, new_path_wrapper) 1057 l.debug('Picking a function 0x%x from pending function hints.', f) 1058 self.kb.functions.function(new_path_wrapper.func_addr, create=True) 1059 break 1060 1061 def _post_analysis(self): 1062 """ 1063 Post-CFG-construction. 1064 1065 :return: None 1066 """ 1067 1068 self._make_completed_functions() 1069 new_changes = self._iteratively_analyze_function_features() 1070 functions_do_not_return = new_changes['functions_do_not_return'] 1071 self._update_function_callsites(functions_do_not_return) 1072 1073 # Create all pending edges 1074 for _, edges in self._pending_edges.items(): 1075 for src_node, dst_node, data in edges: 1076 self._graph_add_edge(src_node, dst_node, **data) 1077 1078 # Remove those edges that will never be taken! 1079 self._remove_non_return_edges() 1080 1081 CFGBase._post_analysis(self) 1082 1083 # Job handling 1084 1085 def _pre_job_handling(self, job): # pylint:disable=arguments-differ 1086 """ 1087 Before processing a CFGJob. 1088 Right now each block is traced at most once. If it is traced more than once, we will mark it as "should_skip" 1089 before tracing it. 1090 An AngrForwardAnalysisSkipJob exception is raised in order to skip analyzing the job. 1091 1092 :param CFGJob job: The CFG job object. 1093 :param dict _locals: A bunch of local variables that will be kept around when handling this job and its 1094 corresponding successors. 1095 :return: None 1096 """ 1097 1098 # Extract initial info the CFGJob 1099 job.call_stack_suffix = job.get_call_stack_suffix() 1100 job.current_function = self.kb.functions.function(job.func_addr, create=True, 1101 syscall=job.jumpkind.startswith("Ijk_Sys")) 1102 src_block_id = job.src_block_id 1103 src_exit_stmt_idx = job.src_exit_stmt_idx 1104 src_ins_addr = job.src_ins_addr 1105 addr = job.addr 1106 1107 # Log this address 1108 if l.level == logging.DEBUG: 1109 self._analyzed_addrs.add(addr) 1110 1111 if addr == job.func_addr: 1112 # Store the input state of this function 1113 self._function_input_states[job.func_addr] = job.state 1114 1115 # deregister this job 1116 self._deregister_analysis_job(job.func_addr, job) 1117 1118 # Generate a unique key for this job 1119 block_id = job.block_id 1120 1121 # Should we skip tracing this block? 1122 should_skip = False 1123 if self._traced_addrs[job.call_stack_suffix][addr] >= self._max_iterations: 1124 should_skip = True 1125 elif self._is_call_jumpkind(job.jumpkind) and \ 1126 self._call_depth is not None and \ 1127 len(job.call_stack) > self._call_depth and \ 1128 (self._call_tracing_filter is None or self._call_tracing_filter(job.state, job.jumpkind)): 1129 should_skip = True 1130 1131 # SimInspect breakpoints support 1132 job.state._inspect('cfg_handle_job', BP_BEFORE) 1133 1134 # Get a SimSuccessors out of current job 1135 sim_successors, exception_info, _ = self._get_simsuccessors(addr, job, current_function_addr=job.func_addr) 1136 1137 # determine the depth of this basic block 1138 if self._max_steps is None: 1139 # it's unnecessary to track depth when we are not limiting max_steps 1140 depth = None 1141 else: 1142 if src_block_id is None: 1143 # oh this is the very first basic block on this path 1144 depth = 0 1145 else: 1146 src_cfgnode = self._nodes[src_block_id] 1147 depth = src_cfgnode.depth + 1 1148 # the depth will not be updated later on even if this block has a greater depth on another path. 1149 # consequently, the `max_steps` limit is not veyr precise - I didn't see a need to make it precise 1150 # though. 1151 1152 if block_id not in self._nodes: 1153 # Create the CFGNode object 1154 cfg_node = self._create_cfgnode(sim_successors, job.call_stack, job.func_addr, block_id=block_id, 1155 depth=depth 1156 ) 1157 1158 self._nodes[block_id] = cfg_node 1159 self._nodes_by_addr[cfg_node.addr].append(cfg_node) 1160 1161 else: 1162 # each block_id should only correspond to one CFGNode 1163 # use the existing CFGNode object 1164 # note that since we reuse existing CFGNodes, we may miss the following cases: 1165 # 1166 # mov eax, label_0 mov ebx, label_1 1167 # jmp xxx jmp xxx 1168 # | | 1169 # | | 1170 # *----------------------* 1171 # | 1172 # call eax 1173 # 1174 # Since the basic block "call eax" will only be traced once, either label_0 or label_1 will be missed in 1175 # this case. Indirect jump resolution might be able to get it, but that's another story. Ideally, 1176 # "call eax" should be traced twice with *different* sim_successors keys, which requires block ID being flow 1177 # sensitive, but it is way too expensive. 1178 1179 cfg_node = self._nodes[block_id] 1180 1181 # Increment tracing count for this block 1182 self._traced_addrs[job.call_stack_suffix][addr] += 1 1183 1184 if self._keep_state: 1185 # TODO: if we are reusing an existing CFGNode, we will be overwriting the original input state here. we 1186 # TODO: should save them all, which, unfortunately, requires some redesigning :-( 1187 cfg_node.input_state = sim_successors.initial_state 1188 1189 # See if this job cancels another FakeRet 1190 # This should be done regardless of whether this job should be skipped or not, otherwise edges will go missing 1191 # in the CFG or function transition graphs. 1192 if job.jumpkind == 'Ijk_FakeRet' or \ 1193 (job.jumpkind == 'Ijk_Ret' and block_id in self._pending_jobs): 1194 # The fake ret is confirmed (since we are returning from the function it calls). Create an edge for it 1195 # in the graph. 1196 1197 the_jobs = [ ] 1198 if block_id in self._pending_jobs: 1199 the_jobs = self._pending_jobs.pop(block_id) 1200 for the_job in the_jobs: # type: Pendingjob 1201 self._deregister_analysis_job(the_job.caller_func_addr, the_job) 1202 else: 1203 the_jobs = [job] 1204 1205 for the_job in the_jobs: 1206 self._graph_add_edge(the_job.src_block_id, block_id, 1207 jumpkind='Ijk_FakeRet', 1208 stmt_idx=the_job.src_exit_stmt_idx, 1209 ins_addr=src_ins_addr 1210 ) 1211 self._update_function_transition_graph(the_job.src_block_id, block_id, 1212 jumpkind='Ijk_FakeRet', 1213 ins_addr=src_ins_addr, 1214 stmt_idx=the_job.src_exit_stmt_idx, 1215 confirmed=True 1216 ) 1217 1218 if sim_successors is None or should_skip: 1219 # We cannot retrieve the block, or we should skip the analysis of this node 1220 # But we create the edge anyway. If the sim_successors does not exist, it will be an edge from the previous 1221 # node to a PathTerminator 1222 self._graph_add_edge(src_block_id, block_id, 1223 jumpkind=job.jumpkind, 1224 stmt_idx=src_exit_stmt_idx, 1225 ins_addr=src_ins_addr 1226 ) 1227 self._update_function_transition_graph(src_block_id, block_id, 1228 jumpkind=job.jumpkind, 1229 ins_addr=src_ins_addr, 1230 stmt_idx=src_exit_stmt_idx) 1231 1232 # We are good. Raise the exception and leave 1233 raise AngrSkipJobNotice() 1234 1235 self._update_thumb_addrs(sim_successors, job.state) 1236 1237 # We store the function hints first. Function hints will be checked at the end of the analysis to avoid 1238 # any duplication with existing jumping targets 1239 if self._enable_function_hints: 1240 if sim_successors.sort == 'IRSB' and sim_successors.all_successors: 1241 function_hints = self._search_for_function_hints(sim_successors.all_successors[0]) 1242 for f in function_hints: 1243 self._pending_function_hints.add(f) 1244 1245 self._graph_add_edge(src_block_id, block_id, jumpkind=job.jumpkind, stmt_idx=src_exit_stmt_idx, 1246 ins_addr=src_ins_addr) 1247 self._update_function_transition_graph(src_block_id, block_id, jumpkind=job.jumpkind, 1248 ins_addr=src_ins_addr, stmt_idx=src_exit_stmt_idx) 1249 1250 if block_id in self._pending_edges: 1251 # there are some edges waiting to be created. do it here. 1252 for src_key, dst_key, data in self._pending_edges[block_id]: 1253 self._graph_add_edge(src_key, dst_key, **data) 1254 del self._pending_edges[block_id] 1255 1256 block_info = {reg: sim_successors.initial_state.registers.load(reg) for reg in self.project.arch.persistent_regs} 1257 self._block_artifacts[addr] = block_info 1258 1259 job.cfg_node = cfg_node 1260 job.sim_successors = sim_successors 1261 job.exception_info = exception_info 1262 1263 # For debugging purposes! 1264 job.successor_status = {} 1265 1266 def _get_successors(self, job): 1267 """ 1268 Get a collection of successors out of the current job. 1269 1270 :param CFGJob job: The CFGJob instance. 1271 :return: A collection of successors. 1272 :rtype: list 1273 """ 1274 1275 addr = job.addr 1276 sim_successors = job.sim_successors 1277 cfg_node = job.cfg_node 1278 input_state = job.state 1279 func_addr = job.func_addr 1280 1281 # check step limit 1282 if self._max_steps is not None: 1283 depth = cfg_node.depth 1284 if depth >= self._max_steps: 1285 return [ ] 1286 1287 successors = [ ] 1288 is_indirect_jump = sim_successors.sort == 'IRSB' and self._is_indirect_jump(cfg_node, sim_successors) 1289 indirect_jump_resolved_by_resolvers = False 1290 1291 if is_indirect_jump and self._resolve_indirect_jumps: 1292 # Try to resolve indirect jumps 1293 irsb = input_state.block(cross_insn_opt=False).vex 1294 1295 resolved, resolved_targets, ij = self._indirect_jump_encountered(addr, cfg_node, irsb, func_addr, 1296 stmt_idx=DEFAULT_STATEMENT) 1297 if resolved: 1298 successors = self._convert_indirect_jump_targets_to_states(job, resolved_targets) 1299 if ij: 1300 self._indirect_jump_resolved(ij, ij.addr, None, resolved_targets) 1301 else: 1302 # Try to resolve this indirect jump using heavier approaches 1303 resolved_targets = self._process_one_indirect_jump(ij) 1304 successors = self._convert_indirect_jump_targets_to_states(job, resolved_targets) 1305 1306 if successors: 1307 indirect_jump_resolved_by_resolvers = True 1308 else: 1309 # It's unresolved. Add it to the wait list (but apparently we don't have any better way to resolve it 1310 # right now). 1311 self._indirect_jumps_to_resolve.add(ij) 1312 1313 if not successors: 1314 # Get all successors of this block 1315 successors = (sim_successors.flat_successors + sim_successors.unsat_successors) \ 1316 if addr not in self._avoid_runs else [] 1317 1318 # Post-process successors 1319 successors, job.extra_info = self._post_process_successors(input_state, sim_successors, successors) 1320 1321 all_successors = successors + sim_successors.unconstrained_successors 1322 1323 # make sure FakeRets are at the last 1324 all_successors = [ suc for suc in all_successors if suc.history.jumpkind != 'Ijk_FakeRet' ] + \ 1325 [ suc for suc in all_successors if suc.history.jumpkind == 'Ijk_FakeRet' ] 1326 1327 if self._keep_state: 1328 cfg_node.final_states = all_successors[::] 1329 1330 if is_indirect_jump and not indirect_jump_resolved_by_resolvers: 1331 # For indirect jumps, filter successors that do not make sense 1332 successors = self._filter_insane_successors(successors) 1333 1334 successors = self._try_resolving_indirect_jumps(sim_successors, 1335 cfg_node, 1336 func_addr, 1337 successors, 1338 job.exception_info, 1339 self._block_artifacts) 1340 # Remove all successors whose IP is symbolic 1341 successors = [ s for s in successors if not s.ip.symbolic ] 1342 1343 # Add additional edges supplied by the user 1344 successors = self._add_additional_edges(input_state, sim_successors, cfg_node, successors) 1345 1346 # if base graph is used, add successors implied from the graph 1347 if self._base_graph: 1348 basegraph_successor_addrs = set() 1349 for src_, dst_ in self._base_graph.edges(): 1350 if src_.addr == addr: 1351 basegraph_successor_addrs.add(dst_.addr) 1352 successor_addrs = {s.solver.eval(s.ip) for s in successors} 1353 extra_successor_addrs = basegraph_successor_addrs - successor_addrs 1354 1355 if all_successors: # make sure we have a base state to use 1356 base_state = all_successors[0] # TODO: for calls, we want to use the fake_ret state 1357 1358 for s_addr in extra_successor_addrs: 1359 # an extra target 1360 successor_state = base_state.copy() 1361 successor_state.ip = s_addr 1362 successors.append(successor_state) 1363 else: 1364 if extra_successor_addrs: 1365 l.error('CFGEmulated terminates at %#x although base graph provided more exits.', addr) 1366 1367 if not successors: 1368 # There is no way out :-( 1369 # Log it first 1370 self._push_unresolvable_run(addr) 1371 1372 if sim_successors.sort == 'SimProcedure' and isinstance(sim_successors.artifacts['procedure'], 1373 SIM_PROCEDURES["stubs"]["PathTerminator"]): 1374 # If there is no valid exit in this branch and it's not 1375 # intentional (e.g. caused by a SimProcedure that does not 1376 # do_return) , we should make it return to its call-site. However, 1377 # we don't want to use its state anymore as it might be corrupted. 1378 # Just create an edge in the graph. 1379 return_target = job.call_stack.current_return_target 1380 if return_target is not None: 1381 new_call_stack = job.call_stack_copy() 1382 return_target_key = self._generate_block_id( 1383 new_call_stack.stack_suffix(self.context_sensitivity_level), 1384 return_target, 1385 False 1386 ) # You can never return to a syscall 1387 1388 if not cfg_node.instruction_addrs: 1389 ret_ins_addr = None 1390 else: 1391 if self.project.arch.branch_delay_slot: 1392 if len(cfg_node.instruction_addrs) > 1: 1393 ret_ins_addr = cfg_node.instruction_addrs[-2] 1394 else: 1395 l.error('At %s: expecting more than one instruction. Only got one.', cfg_node) 1396 ret_ins_addr = None 1397 else: 1398 ret_ins_addr = cfg_node.instruction_addrs[-1] 1399 1400 # Things might be a bit difficult here. _graph_add_edge() requires both nodes to exist, but here 1401 # the return target node may not exist yet. If that's the case, we will put it into a "delayed edge 1402 # list", and add this edge later when the return target CFGNode is created. 1403 if return_target_key in self._nodes: 1404 self._graph_add_edge(job.block_id, return_target_key, jumpkind='Ijk_Ret', 1405 stmt_id=DEFAULT_STATEMENT, ins_addr=ret_ins_addr) 1406 else: 1407 self._pending_edges[return_target_key].append((job.block_id, return_target_key, 1408 { 1409 'jumpkind': 'Ijk_Ret', 1410 'stmt_id': DEFAULT_STATEMENT, 1411 'ins_addr': ret_ins_addr, 1412 } 1413 ) 1414 ) 1415 1416 else: 1417 # There are no successors, but we still want to update the function graph 1418 artifacts = job.sim_successors.artifacts 1419 if 'irsb' in artifacts and 'insn_addrs' in artifacts and artifacts['insn_addrs']: 1420 the_irsb = artifacts['irsb'] 1421 insn_addrs = artifacts['insn_addrs'] 1422 self._handle_job_without_successors(job, the_irsb, insn_addrs) 1423 1424 # TODO: replace it with a DDG-based function IO analysis 1425 # handle all actions 1426 if successors: 1427 self._handle_actions(successors[0], 1428 sim_successors, 1429 job.current_function, 1430 job.current_stack_pointer, 1431 set(), 1432 ) 1433 1434 return successors 1435 1436 def _post_job_handling(self, job, new_jobs, successors): 1437 """ 1438 1439 :param CFGJob job: 1440 :param successors: 1441 :return: 1442 """ 1443 1444 # Finally, post-process CFG Node and log the return target 1445 if job.extra_info: 1446 if job.extra_info['is_call_jump'] and job.extra_info['return_target'] is not None: 1447 job.cfg_node.return_target = job.extra_info['return_target'] 1448 1449 # Debugging output if needed 1450 if l.level == logging.DEBUG: 1451 # Only in DEBUG mode do we process and output all those shit 1452 self._post_handle_job_debug(job, successors) 1453 1454 # SimInspect breakpoints support 1455 job.state._inspect('cfg_handle_job', BP_AFTER) 1456 1457 def _post_process_successors(self, input_state, sim_successors, successors): 1458 """ 1459 Filter the list of successors 1460 1461 :param SimState input_state: Input state. 1462 :param SimSuccessors sim_successors: The SimSuccessors instance. 1463 :param list successors: A list of successors generated after processing the current block. 1464 :return: A list of successors. 1465 :rtype: list 1466 """ 1467 1468 if sim_successors.sort == 'IRSB' and input_state.thumb: 1469 successors = self._arm_thumb_filter_jump_successors(sim_successors.artifacts['irsb'], 1470 successors, 1471 lambda state: state.scratch.ins_addr, 1472 lambda state: state.scratch.exit_stmt_idx, 1473 lambda state: state.history.jumpkind, 1474 ) 1475 1476 # If there is a call exit, we shouldn't put the default exit (which 1477 # is artificial) into the CFG. The exits will be Ijk_Call and 1478 # Ijk_FakeRet, and Ijk_Call always goes first 1479 extra_info = {'is_call_jump': False, 1480 'call_target': None, 1481 'return_target': None, 1482 'last_call_exit_target': None, 1483 'skip_fakeret': False, 1484 } 1485 1486 # Post-process jumpkind before touching all_successors 1487 for suc in sim_successors.all_successors: # we process all successors here to include potential unsat successors 1488 suc_jumpkind = suc.history.jumpkind 1489 if self._is_call_jumpkind(suc_jumpkind): 1490 extra_info['is_call_jump'] = True 1491 break 1492 1493 return successors, extra_info 1494 1495 def _post_handle_job_debug(self, job, successors): 1496 """ 1497 Post job handling: print debugging information regarding the current job. 1498 1499 :param CFGJob job: The current CFGJob instance. 1500 :param list successors: All successors of the analysis job. 1501 :return: None 1502 """ 1503 1504 sim_successors = job.sim_successors 1505 call_stack_suffix = job.call_stack_suffix 1506 extra_info = job.extra_info 1507 successor_status = job.successor_status 1508 1509 func = self.project.loader.find_symbol(job.func_addr) 1510 obj = self.project.loader.find_object_containing(job.addr) 1511 function_name = func.name if func is not None else None 1512 module_name = obj.provides if obj is not None else None 1513 1514 node = self.model.get_node(job.block_id) 1515 depth_str = "(D:%s)" % node.depth if node.depth is not None else "" 1516 1517 l.debug("%s [%#x%s | %s]", sim_successors.description, sim_successors.addr, depth_str, 1518 "->".join([hex(i) for i in call_stack_suffix if i is not None]) 1519 ) 1520 l.debug("(Function %s of binary %s)", function_name, module_name) 1521 l.debug("| Call jump: %s", extra_info['is_call_jump'] if extra_info is not None else 'unknown') 1522 1523 for suc in successors: 1524 jumpkind = suc.history.jumpkind 1525 if jumpkind == "Ijk_FakeRet": 1526 exit_type_str = "Simulated Ret" 1527 else: 1528 exit_type_str = "-" 1529 try: 1530 l.debug("| target: %#x %s [%s] %s", suc.solver.eval_one(suc.ip), successor_status[suc], 1531 exit_type_str, jumpkind) 1532 except (SimValueError, SimSolverModeError): 1533 l.debug("| target cannot be concretized. %s [%s] %s", successor_status[suc], exit_type_str, 1534 jumpkind) 1535 l.debug("%d exits remaining, %d exits pending.", len(self._job_info_queue), len(self._pending_jobs)) 1536 l.debug("%d unique basic blocks are analyzed so far.", len(self._analyzed_addrs)) 1537 1538 def _iteratively_clean_pending_exits(self): 1539 """ 1540 Iteratively update the completed functions set, analyze whether each function returns or not, and remove 1541 pending exits if the callee function does not return. We do this iteratively so that we come to a fixed point 1542 in the end. In most cases, the number of outer iteration equals to the maximum levels of wrapped functions 1543 whose "returningness" is determined by the very last function it calls. 1544 1545 :return: None 1546 """ 1547 1548 while True: 1549 # did we finish analyzing any function? 1550 # fill in self._completed_functions 1551 self._make_completed_functions() 1552 1553 if self._pending_jobs: 1554 # There are no more remaining jobs, but only pending jobs left. Each pending job corresponds to 1555 # a previous job that does not return properly. 1556 # Now it's a good time to analyze each function (that we have so far) and determine if it is a) 1557 # returning, b) not returning, or c) unknown. For those functions that are definitely not returning, 1558 # remove the corresponding pending exits from `pending_jobs` array. Perform this procedure iteratively 1559 # until no new not-returning functions appear. Then we pick a pending exit based on the following 1560 # priorities: 1561 # - Job pended by a returning function 1562 # - Job pended by an unknown function 1563 1564 new_changes = self._iteratively_analyze_function_features() 1565 functions_do_not_return = new_changes['functions_do_not_return'] 1566 1567 self._update_function_callsites(functions_do_not_return) 1568 1569 if not self._clean_pending_exits(): 1570 # no more pending exits are removed. we are good to go! 1571 break 1572 else: 1573 break 1574 1575 def _clean_pending_exits(self): 1576 """ 1577 Remove those pending exits if: 1578 a) they are the return exits of non-returning SimProcedures 1579 b) they are the return exits of non-returning syscalls 1580 c) they are the return exits of non-returning functions 1581 1582 :return: True if any pending exits are removed, False otherwise 1583 :rtype: bool 1584 """ 1585 1586 pending_exits_to_remove = [ ] 1587 1588 for block_id, jobs in self._pending_jobs.items(): 1589 for pe in jobs: 1590 if pe.returning_source is None: 1591 # The original call failed. This pending exit must be followed. 1592 continue 1593 1594 func = self.kb.functions.function(pe.returning_source) 1595 if func is None: 1596 # Why does it happen? 1597 l.warning("An expected function at %s is not found. Please report it to Fish.", 1598 hex(pe.returning_source) if pe.returning_source is not None else 'None') 1599 continue 1600 1601 if func.returning is False: 1602 # Oops, it's not returning 1603 # Remove this pending exit 1604 pending_exits_to_remove.append(block_id) 1605 1606 # We want to mark that call as not returning in the current function 1607 current_function_addr = self._block_id_current_func_addr(block_id) 1608 if current_function_addr is not None: 1609 current_function = self.kb.functions.function(current_function_addr) 1610 if current_function is not None: 1611 call_site_addr = self._block_id_addr(pe.src_block_id) 1612 current_function._call_sites[call_site_addr] = (func.addr, None) 1613 else: 1614 l.warning('An expected function at %#x is not found. Please report it to Fish.', 1615 current_function_addr 1616 ) 1617 1618 for block_id in pending_exits_to_remove: 1619 l.debug('Removing all pending exits to %#x since the target function %#x does not return', 1620 self._block_id_addr(block_id), 1621 next(iter(self._pending_jobs[block_id])).returning_source, 1622 ) 1623 1624 for to_remove in self._pending_jobs[block_id]: 1625 self._deregister_analysis_job(to_remove.caller_func_addr, to_remove) 1626 1627 del self._pending_jobs[block_id] 1628 1629 if pending_exits_to_remove: 1630 return True 1631 return False 1632 1633 # Successor handling 1634 1635 def _pre_handle_successor_state(self, extra_info, jumpkind, target_addr): 1636 """ 1637 1638 :return: None 1639 """ 1640 1641 # Fill up extra_info 1642 if extra_info['is_call_jump'] and self._is_call_jumpkind(jumpkind): 1643 extra_info['call_target'] = target_addr 1644 1645 if (extra_info['is_call_jump'] and 1646 jumpkind == 'Ijk_FakeRet'): 1647 extra_info['return_target'] = target_addr 1648 1649 if jumpkind == "Ijk_Call": 1650 extra_info['last_call_exit_target'] = target_addr 1651 1652 def _handle_successor(self, job, successor, successors): 1653 """ 1654 Returns a new CFGJob instance for further analysis, or None if there is no immediate state to perform the 1655 analysis on. 1656 1657 :param CFGJob job: The current job. 1658 """ 1659 1660 state = successor 1661 all_successor_states = successors 1662 addr = job.addr 1663 1664 # The PathWrapper instance to return 1665 pw = None 1666 1667 job.successor_status[state] = "" 1668 1669 new_state = state.copy() 1670 suc_jumpkind = state.history.jumpkind 1671 suc_exit_stmt_idx = state.scratch.exit_stmt_idx 1672 suc_exit_ins_addr = state.scratch.exit_ins_addr 1673 1674 if suc_jumpkind in {'Ijk_EmWarn', 'Ijk_NoDecode', 'Ijk_MapFail', 'Ijk_NoRedir', 1675 'Ijk_SigTRAP', 'Ijk_SigSEGV', 'Ijk_ClientReq'}: 1676 # Ignore SimExits that are of these jumpkinds 1677 job.successor_status[state] = "Skipped" 1678 return [ ] 1679 1680 call_target = job.extra_info['call_target'] 1681 if suc_jumpkind == "Ijk_FakeRet" and call_target is not None: 1682 # if the call points to a SimProcedure that doesn't return, we don't follow the fakeret anymore 1683 if self.project.is_hooked(call_target): 1684 sim_proc = self.project._sim_procedures[call_target] 1685 if sim_proc.NO_RET: 1686 return [ ] 1687 1688 # Get target address 1689 try: 1690 target_addr = state.solver.eval_one(state.ip) 1691 except (SimValueError, SimSolverModeError): 1692 # It cannot be concretized currently. Maybe we can handle it later, maybe it just cannot be concretized 1693 target_addr = None 1694 if suc_jumpkind == "Ijk_Ret": 1695 target_addr = job.call_stack.current_return_target 1696 if target_addr is not None: 1697 new_state.ip = new_state.solver.BVV(target_addr, new_state.arch.bits) 1698 1699 if target_addr is None: 1700 # Unlucky... 1701 return [ ] 1702 1703 if state.thumb: 1704 # Make sure addresses are always odd. It is important to encode this information in the address for the 1705 # time being. 1706 target_addr |= 1 1707 1708 # see if the target successor is in our whitelist 1709 if self._address_whitelist is not None: 1710 if target_addr not in self._address_whitelist: 1711 l.debug("Successor %#x is not in the address whitelist. Skip.", target_addr) 1712 return [ ] 1713 1714 # see if this edge is in the base graph 1715 if self._base_graph is not None: 1716 # TODO: make it more efficient. the current implementation is half-assed and extremely slow 1717 for src_, dst_ in self._base_graph.edges(): 1718 if src_.addr == addr and dst_.addr == target_addr: 1719 break 1720 else: 1721 # not found 1722 l.debug("Edge (%#x -> %#x) is not found in the base graph. Skip.", addr, target_addr) 1723 return [ ] 1724 1725 # Fix target_addr for syscalls 1726 if suc_jumpkind.startswith("Ijk_Sys"): 1727 syscall_proc = self.project.simos.syscall(new_state) 1728 if syscall_proc is not None: 1729 target_addr = syscall_proc.addr 1730 1731 self._pre_handle_successor_state(job.extra_info, suc_jumpkind, target_addr) 1732 1733 if suc_jumpkind == "Ijk_FakeRet": 1734 if target_addr == job.extra_info['last_call_exit_target']: 1735 l.debug("... skipping a fake return exit that has the same target with its call exit.") 1736 job.successor_status[state] = "Skipped" 1737 return [ ] 1738 1739 if job.extra_info['skip_fakeret']: 1740 l.debug('... skipping a fake return exit since the function it\'s calling doesn\'t return') 1741 job.successor_status[state] = "Skipped - non-returning function 0x%x" % job.extra_info['call_target'] 1742 return [ ] 1743 1744 # TODO: Make it optional 1745 if (suc_jumpkind == 'Ijk_Ret' and 1746 self._call_depth is not None and 1747 len(job.call_stack) <= 1 1748 ): 1749 # We cannot continue anymore since this is the end of the function where we started tracing 1750 l.debug('... reaching the end of the starting function, skip.') 1751 job.successor_status[state] = "Skipped - reaching the end of the starting function" 1752 return [ ] 1753 1754 # Create the new call stack of target block 1755 new_call_stack = self._create_new_call_stack(addr, all_successor_states, job, target_addr, 1756 suc_jumpkind) 1757 # Create the callstack suffix 1758 new_call_stack_suffix = new_call_stack.stack_suffix(self._context_sensitivity_level) 1759 # Tuple that will be used to index this exit 1760 new_tpl = self._generate_block_id(new_call_stack_suffix, target_addr, suc_jumpkind.startswith('Ijk_Sys')) 1761 1762 # We might have changed the mode for this basic block 1763 # before. Make sure it is still running in 'fastpath' mode 1764 self._reset_state_mode(new_state, 'fastpath') 1765 1766 pw = CFGJob(target_addr, 1767 new_state, 1768 self._context_sensitivity_level, 1769 src_block_id=job.block_id, 1770 src_exit_stmt_idx=suc_exit_stmt_idx, 1771 src_ins_addr=suc_exit_ins_addr, 1772 call_stack=new_call_stack, 1773 jumpkind=suc_jumpkind, 1774 ) 1775 # Special case: If the binary has symbols and the target address is a function, but for some reason (e.g., 1776 # a tail-call optimization) the CallStack's function address is still the old function address, we will have to 1777 # overwrite it here. 1778 if not self._is_call_jumpkind(pw.jumpkind): 1779 target_symbol = self.project.loader.find_symbol(target_addr) 1780 if target_symbol and target_symbol.is_function: 1781 # Force update the function address 1782 pw.func_addr = target_addr 1783 1784 # Generate new exits 1785 if suc_jumpkind == "Ijk_Ret": 1786 # This is the real return exit 1787 job.successor_status[state] = "Appended" 1788 1789 elif suc_jumpkind == "Ijk_FakeRet": 1790 # This is the default "fake" retn that generated at each 1791 # call. Save them first, but don't process them right 1792 # away 1793 # st = self.project._simos.prepare_call_state(new_state, initial_state=saved_state) 1794 st = new_state 1795 self._reset_state_mode(st, 'fastpath') 1796 1797 pw = None # clear the job 1798 pe = PendingJob(job.func_addr, 1799 job.extra_info['call_target'], 1800 st, 1801 job.block_id, 1802 suc_exit_stmt_idx, 1803 suc_exit_ins_addr, 1804 new_call_stack 1805 ) 1806 self._pending_jobs[new_tpl].append(pe) 1807 self._register_analysis_job(pe.caller_func_addr, pe) 1808 job.successor_status[state] = "Pended" 1809 1810 elif self._traced_addrs[new_call_stack_suffix][target_addr] >= 1 and suc_jumpkind == "Ijk_Ret": 1811 # This is a corner case for the f****** ARM instruction 1812 # like 1813 # BLEQ <address> 1814 # If we have analyzed the boring exit before returning from that called address, we will lose the link 1815 # between the last block of the function being called and the basic block it returns to. We cannot 1816 # reanalyze the basic block as we are not flow-sensitive, but we can still record the connection and make 1817 # for it afterwards. 1818 pass 1819 1820 else: 1821 job.successor_status[state] = "Appended" 1822 1823 if job.extra_info['is_call_jump'] and job.extra_info['call_target'] in self._non_returning_functions: 1824 job.extra_info['skip_fakeret'] = True 1825 1826 if not pw: 1827 return [ ] 1828 1829 if self._base_graph is not None: 1830 # remove all existing jobs that has the same block ID 1831 if next((en for en in self.jobs if en.block_id == pw.block_id), None): 1832 # TODO: this is very hackish. Reimplement this logic later 1833 self._job_info_queue = [entry for entry in self._job_info_queue if entry.job.block_id != pw.block_id] 1834 1835 # register the job 1836 self._register_analysis_job(pw.func_addr, pw) 1837 1838 return [ pw ] 1839 1840 def _handle_job_without_successors(self, job, irsb, insn_addrs): 1841 """ 1842 A block without successors should still be handled so it can be added to the function graph correctly. 1843 1844 :param CFGJob job: The current job that do not have any successor. 1845 :param IRSB irsb: The related IRSB. 1846 :param insn_addrs: A list of instruction addresses of this IRSB. 1847 :return: None 1848 """ 1849 1850 # it's not an empty block 1851 1852 # handle all conditional exits 1853 ins_addr = job.addr 1854 for stmt_idx, stmt in enumerate(irsb.statements): 1855 if type(stmt) is pyvex.IRStmt.IMark: 1856 ins_addr = stmt.addr + stmt.delta 1857 elif type(stmt) is pyvex.IRStmt.Exit: 1858 successor_jumpkind = stmt.jk 1859 self._update_function_transition_graph( 1860 job.block_id, None, 1861 jumpkind = successor_jumpkind, 1862 ins_addr=ins_addr, 1863 stmt_idx=stmt_idx, 1864 ) 1865 1866 # handle the default exit 1867 successor_jumpkind = irsb.jumpkind 1868 successor_last_ins_addr = insn_addrs[-1] 1869 self._update_function_transition_graph(job.block_id, None, 1870 jumpkind=successor_jumpkind, 1871 ins_addr=successor_last_ins_addr, 1872 stmt_idx=DEFAULT_STATEMENT, 1873 ) 1874 1875 # SimAction handling 1876 1877 def _handle_actions(self, state, current_run, func, sp_addr, accessed_registers): 1878 """ 1879 For a given state and current location of of execution, will update a function by adding the offets of 1880 appropriate actions to the stack variable or argument registers for the fnc. 1881 1882 :param SimState state: upcoming state. 1883 :param SimSuccessors current_run: possible result states. 1884 :param knowledge.Function func: current function. 1885 :param int sp_addr: stack pointer address. 1886 :param set accessed_registers: set of before accessed registers. 1887 """ 1888 se = state.solver 1889 1890 if func is not None and sp_addr is not None: 1891 1892 # Fix the stack pointer (for example, skip the return address on the stack) 1893 new_sp_addr = sp_addr + self.project.arch.call_sp_fix 1894 1895 actions = [a for a in state.history.recent_actions if a.bbl_addr == current_run.addr] 1896 1897 for a in actions: 1898 if a.type == "mem" and a.action == "read": 1899 try: 1900 addr = se.eval_one(a.addr.ast, default=0) 1901 except (claripy.ClaripyError, SimSolverModeError): 1902 continue 1903 if (self.project.arch.call_pushes_ret and addr >= new_sp_addr) or \ 1904 (not self.project.arch.call_pushes_ret and addr >= new_sp_addr): 1905 # TODO: What if a variable locates higher than the stack is modified as well? We probably want 1906 # TODO: to make sure the accessing address falls in the range of stack 1907 offset = addr - new_sp_addr 1908 func._add_argument_stack_variable(offset) 1909 elif a.type == "reg": 1910 offset = a.offset 1911 if a.action == "read" and offset not in accessed_registers: 1912 func._add_argument_register(offset) 1913 elif a.action == "write": 1914 accessed_registers.add(offset) 1915 else: 1916 l.error( 1917 "handle_actions: Function not found, or stack pointer is None. It might indicates unbalanced stack.") 1918 1919 # Private utils - DiGraph construction and manipulation 1920 1921 def _graph_get_node(self, node_key, terminator_for_nonexistent_node=False): 1922 """ 1923 1924 :param node_key: 1925 :return: 1926 """ 1927 1928 if node_key not in self._nodes: 1929 if not terminator_for_nonexistent_node: 1930 return None 1931 # Generate a PathTerminator node 1932 addr = self._block_id_addr(node_key) 1933 func_addr = self._block_id_current_func_addr(node_key) 1934 if func_addr is None: 1935 # We'll have to use the current block address instead 1936 # TODO: Is it really OK? 1937 func_addr = self._block_id_addr(node_key) 1938 1939 is_thumb = isinstance(self.project.arch, ArchARM) and addr % 2 == 1 1940 1941 pt = CFGENode(self._block_id_addr(node_key), 1942 None, 1943 self.model, 1944 input_state=None, 1945 simprocedure_name="PathTerminator", 1946 function_address=func_addr, 1947 callstack_key=self._block_id_callstack_key(node_key), 1948 thumb=is_thumb 1949 ) 1950 if self._keep_state: 1951 # We don't have an input state available for it (otherwise we won't have to create a 1952 # PathTerminator). This is just a trick to make get_any_irsb() happy. 1953 pt.input_state = self.project.factory.entry_state() 1954 pt.input_state.ip = pt.addr 1955 self._nodes[node_key] = pt 1956 self._nodes_by_addr[pt.addr].append(pt) 1957 1958 if is_thumb: 1959 self._thumb_addrs.add(addr) 1960 self._thumb_addrs.add(addr - 1) 1961 1962 l.debug("Block ID %s does not exist. Create a PathTerminator instead.", 1963 self._block_id_repr(node_key)) 1964 1965 return self._nodes[node_key] 1966 1967 def _graph_add_edge(self, src_node_key, dst_node_key, **kwargs): 1968 """ 1969 1970 :param src_node_key: 1971 :param dst_node_key: 1972 :param jumpkind: 1973 :param exit_stmt_idx: 1974 :return: 1975 """ 1976 1977 dst_node = self._graph_get_node(dst_node_key, terminator_for_nonexistent_node=True) 1978 1979 if src_node_key is None: 1980 self.graph.add_node(dst_node) 1981 1982 else: 1983 src_node = self._graph_get_node(src_node_key, terminator_for_nonexistent_node=True) 1984 self.graph.add_edge(src_node, dst_node, **kwargs) 1985 1986 def _update_function_transition_graph(self, src_node_key, dst_node_key, jumpkind='Ijk_Boring', ins_addr=None, 1987 stmt_idx=None, confirmed=None): 1988 """ 1989 Update transition graphs of functions in function manager based on information passed in. 1990 1991 :param src_node_key: Node key of the source CFGNode. Might be None. 1992 :param dst_node: Node key of the destination CFGNode. Might be None. 1993 :param str jumpkind: Jump kind of this transition. 1994 :param int ret_addr: The theoretical return address for calls. 1995 :param int or None ins_addr: Address of the instruction where this transition is made. 1996 :param int or None stmt_idx: ID of the statement where this transition is made. 1997 :param bool or None confirmed: Whether this call transition has been confirmed or not. 1998 :return: None 1999 """ 2000 2001 if dst_node_key is not None: 2002 dst_node = self._graph_get_node(dst_node_key, terminator_for_nonexistent_node=True) 2003 dst_node_addr = dst_node.addr 2004 dst_codenode = dst_node.to_codenode() 2005 dst_node_func_addr = dst_node.function_address 2006 else: 2007 dst_node = None 2008 dst_node_addr = None 2009 dst_codenode = None 2010 dst_node_func_addr = None 2011 2012 if src_node_key is None: 2013 if dst_node is None: 2014 raise ValueError("Either src_node_key or dst_node_key must be specified.") 2015 self.kb.functions.function(dst_node.function_address, create=True)._register_nodes(True, 2016 dst_codenode 2017 ) 2018 return 2019 2020 src_node = self._graph_get_node(src_node_key, terminator_for_nonexistent_node=True) 2021 2022 # Update the transition graph of current function 2023 if jumpkind == "Ijk_Call": 2024 ret_addr = src_node.return_target 2025 ret_node = self.kb.functions.function( 2026 src_node.function_address, 2027 create=True 2028 )._get_block(ret_addr).codenode if ret_addr else None 2029 2030 self.kb.functions._add_call_to( 2031 function_addr=src_node.function_address, 2032 from_node=src_node.to_codenode(), 2033 to_addr=dst_node_addr, 2034 retn_node=ret_node, 2035 syscall=False, 2036 ins_addr=ins_addr, 2037 stmt_idx=stmt_idx, 2038 ) 2039 2040 if jumpkind.startswith('Ijk_Sys'): 2041 2042 self.kb.functions._add_call_to( 2043 function_addr=src_node.function_address, 2044 from_node=src_node.to_codenode(), 2045 to_addr=dst_node_addr, 2046 retn_node=src_node.to_codenode(), # For syscalls, they are returning to the address of themselves 2047 syscall=True, 2048 ins_addr=ins_addr, 2049 stmt_idx=stmt_idx, 2050 ) 2051 2052 elif jumpkind == 'Ijk_Ret': 2053 # Create a return site for current function 2054 self.kb.functions._add_return_from( 2055 function_addr=src_node.function_address, 2056 from_node=src_node.to_codenode(), 2057 to_node=dst_codenode, 2058 ) 2059 2060 if dst_node is not None: 2061 # Create a returning edge in the caller function 2062 self.kb.functions._add_return_from_call( 2063 function_addr=dst_node_func_addr, 2064 src_function_addr=src_node.function_address, 2065 to_node=dst_codenode, 2066 ) 2067 2068 elif jumpkind == 'Ijk_FakeRet': 2069 self.kb.functions._add_fakeret_to( 2070 function_addr=src_node.function_address, 2071 from_node=src_node.to_codenode(), 2072 to_node=dst_codenode, 2073 confirmed=confirmed, 2074 ) 2075 2076 elif jumpkind in ('Ijk_Boring', 'Ijk_InvalICache'): 2077 2078 src_obj = self.project.loader.find_object_containing(src_node.addr) 2079 dest_obj = self.project.loader.find_object_containing(dst_node.addr) if dst_node is not None else None 2080 2081 if src_obj is dest_obj: 2082 # Jump/branch within the same object. Might be an outside jump. 2083 to_outside = src_node.function_address != dst_node_func_addr 2084 else: 2085 # Jump/branch between different objects. Must be an outside jump. 2086 to_outside = True 2087 2088 if not to_outside: 2089 self.kb.functions._add_transition_to( 2090 function_addr=src_node.function_address, 2091 from_node=src_node.to_codenode(), 2092 to_node=dst_codenode, 2093 ins_addr=ins_addr, 2094 stmt_idx=stmt_idx, 2095 ) 2096 2097 else: 2098 self.kb.functions._add_outside_transition_to( 2099 function_addr=src_node.function_address, 2100 from_node=src_node.to_codenode(), 2101 to_node=dst_codenode, 2102 to_function_addr=dst_node_func_addr, 2103 ins_addr=ins_addr, 2104 stmt_idx=stmt_idx, 2105 ) 2106 2107 def _update_function_callsites(self, noreturns): 2108 """ 2109 Update the callsites of functions (remove return targets) that are calling functions that are just deemed not 2110 returning. 2111 2112 :param iterable func_addrs: A collection of functions for newly-recovered non-returning functions. 2113 :return: None 2114 """ 2115 2116 for callee_func in noreturns: 2117 # consult the callgraph to find callers of each function 2118 if callee_func.addr not in self.functions.callgraph: 2119 continue 2120 caller_addrs = self.functions.callgraph.predecessors(callee_func.addr) 2121 for caller_addr in caller_addrs: 2122 caller = self.functions[caller_addr] 2123 if callee_func not in caller.transition_graph: 2124 continue 2125 callsites = caller.transition_graph.predecessors(callee_func) 2126 for callsite in callsites: 2127 caller._add_call_site(callsite.addr, callee_func.addr, None) 2128 2129 def _add_additional_edges(self, input_state, sim_successors, cfg_node, successors): 2130 """ 2131 2132 :return: 2133 """ 2134 2135 # If we have additional edges for this block, add them in 2136 addr = cfg_node.addr 2137 2138 if addr in self._additional_edges: 2139 dests = self._additional_edges[addr] 2140 for dst in dests: 2141 if sim_successors.sort == 'IRSB': 2142 base_state = sim_successors.all_successors[0].copy() 2143 else: 2144 if successors: 2145 # We try to use the first successor. 2146 base_state = successors[0].copy() 2147 else: 2148 # The SimProcedure doesn't have any successor (e.g. it's a PathTerminator) 2149 # We'll use its input state instead 2150 base_state = input_state 2151 base_state.ip = dst 2152 # TODO: Allow for sp adjustments 2153 successors.append(base_state) 2154 l.debug("Additional jump target %#x for block %s is appended.", dst, sim_successors.description) 2155 2156 return successors 2157 2158 def _filter_insane_successors(self, successors): 2159 """ 2160 Throw away all successors whose target doesn't make sense 2161 2162 This method is called after we resolve an indirect jump using an unreliable method (like, not through one of 2163 the indirect jump resolvers, but through either pure concrete execution or backward slicing) to filter out the 2164 obviously incorrect successors. 2165 2166 :param list successors: A collection of successors. 2167 :return: A filtered list of successors 2168 :rtype: list 2169 """ 2170 2171 old_successors = successors[::] 2172 successors = [ ] 2173 for i, suc in enumerate(old_successors): 2174 if suc.solver.symbolic(suc.ip): 2175 # It's symbolic. Take it, and hopefully we can resolve it later 2176 successors.append(suc) 2177 2178 else: 2179 ip_int = suc.solver.eval_one(suc.ip) 2180 2181 if self._is_address_executable(ip_int) or \ 2182 self.project.is_hooked(ip_int) or \ 2183 self.project.simos.is_syscall_addr(ip_int): 2184 successors.append(suc) 2185 else: 2186 l.debug('An obviously incorrect successor %d/%d (%#x) is ditched', 2187 i + 1, 2188 len(old_successors), 2189 ip_int 2190 ) 2191 2192 return successors 2193 2194 def _remove_non_return_edges(self): 2195 """ 2196 Remove those return_from_call edges that actually do not return due to 2197 calling some not-returning functions. 2198 :return: None 2199 """ 2200 for func in self.kb.functions.values(): 2201 graph = func.transition_graph 2202 all_return_edges = [(u, v) for (u, v, data) in graph.edges(data=True) if data['type'] == 'return_from_call'] 2203 for return_from_call_edge in all_return_edges: 2204 callsite_block_addr, return_to_addr = return_from_call_edge 2205 call_func_addr = func.get_call_target(callsite_block_addr) 2206 if call_func_addr is None: 2207 continue 2208 2209 call_func = self.kb.functions.function(call_func_addr) 2210 if call_func is None: 2211 # Weird... 2212 continue 2213 2214 if call_func.returning is False: 2215 # Remove that edge! 2216 graph.remove_edge(call_func_addr, return_to_addr) 2217 # Remove the edge in CFG 2218 nodes = self.get_all_nodes(callsite_block_addr) 2219 for n in nodes: 2220 successors = self.get_successors_and_jumpkind(n, excluding_fakeret=False) 2221 for successor, jumpkind in successors: 2222 if jumpkind == 'Ijk_FakeRet' and successor.addr == return_to_addr: 2223 self.remove_edge(n, successor) 2224 2225 # Remove all dangling nodes 2226 # wcc = list(networkx.weakly_connected_components(graph)) 2227 # for nodes in wcc: 2228 # if func.startpoint not in nodes: 2229 # graph.remove_nodes_from(nodes) 2230 2231 # Private methods - resolving indirect jumps 2232 2233 @staticmethod 2234 def _convert_indirect_jump_targets_to_states(job, indirect_jump_targets): 2235 """ 2236 Convert each concrete indirect jump target into a SimState. If there are non-zero successors and the original 2237 jumpkind is a call, we also generate a fake-ret successor. 2238 2239 :param job: The CFGJob instance. 2240 :param indirect_jump_targets: A collection of concrete jump targets resolved from a indirect jump. 2241 :return: A list of SimStates. 2242 :rtype: list 2243 """ 2244 2245 first_successor = job.sim_successors.all_successors[0] 2246 successors = [ ] 2247 for t in indirect_jump_targets: 2248 # Insert new successors 2249 a = first_successor.copy() 2250 a.ip = t 2251 successors.append(a) 2252 # special case: if the indirect jump is in fact an indirect call, we should create a FakeRet successor 2253 if successors and first_successor.history.jumpkind == 'Ijk_Call': 2254 a = first_successor.copy() 2255 a.ip = job.cfg_node.addr + job.cfg_node.size 2256 a.history.jumpkind = 'Ijk_FakeRet' 2257 successors.append(a) 2258 return successors 2259 2260 def _try_resolving_indirect_jumps(self, sim_successors, cfg_node, func_addr, successors, exception_info, artifacts): 2261 """ 2262 Resolve indirect jumps specified by sim_successors.addr. 2263 2264 :param SimSuccessors sim_successors: The SimSuccessors instance. 2265 :param CFGNode cfg_node: The CFGNode instance. 2266 :param int func_addr: Current function address. 2267 :param list successors: A list of successors. 2268 :param tuple exception_info: The sys.exc_info() of the exception or None if none occurred. 2269 :param artifacts: A container of collected information. 2270 :return: Resolved successors 2271 :rtype: list 2272 """ 2273 2274 # Try to resolve indirect jumps with advanced backward slicing (if enabled) 2275 if sim_successors.sort == 'IRSB' and \ 2276 self._is_indirect_jump(cfg_node, sim_successors): 2277 l.debug('IRSB %#x has an indirect jump as its default exit', cfg_node.addr) 2278 2279 # We need input states to perform backward slicing 2280 if self._advanced_backward_slicing and self._keep_state: 2281 2282 # Optimization: make sure we only try to resolve an indirect jump if any of the following criteria holds 2283 # - It's a jump (Ijk_Boring), and its target is either fully symbolic, or its resolved target is within 2284 # the current binary 2285 # - It's a call (Ijk_Call), and its target is fully symbolic 2286 # TODO: This is very hackish, please refactor this part of code later 2287 should_resolve = True 2288 legit_successors = [suc for suc in successors if suc.history.jumpkind in ('Ijk_Boring', 'Ijk_InvalICache', 'Ijk_Call')] 2289 if legit_successors: 2290 legit_successor = legit_successors[0] 2291 if legit_successor.ip.symbolic: 2292 if not legit_successor.history.jumpkind == 'Ijk_Call': 2293 should_resolve = False 2294 else: 2295 if legit_successor.history.jumpkind == 'Ijk_Call': 2296 should_resolve = False 2297 else: 2298 concrete_target = legit_successor.solver.eval(legit_successor.ip) 2299 if not self.project.loader.find_object_containing( 2300 concrete_target) is self.project.loader.main_object: 2301 should_resolve = False 2302 2303 else: 2304 # No interesting successors... skip 2305 should_resolve = False 2306 2307 # TODO: Handle those successors 2308 if not should_resolve: 2309 l.debug("This might not be an indirect jump that has multiple targets. Skipped.") 2310 self.kb.unresolved_indirect_jumps.add(cfg_node.addr) 2311 2312 else: 2313 more_successors = self._backward_slice_indirect(cfg_node, sim_successors, func_addr) 2314 2315 if more_successors: 2316 # Remove the symbolic successor 2317 # TODO: Now we are removing all symbolic successors. Is it possible 2318 # TODO: that there is more than one symbolic successor? 2319 all_successors = [suc for suc in successors if not suc.solver.symbolic(suc.ip)] 2320 # Insert new successors 2321 # We insert new successors in the beginning of all_successors list so that we don't break the 2322 # assumption that Ijk_FakeRet is always the last element in the list 2323 for suc_addr in more_successors: 2324 a = sim_successors.all_successors[0].copy() 2325 a.ip = suc_addr 2326 all_successors.insert(0, a) 2327 2328 l.debug('The indirect jump is successfully resolved.') 2329 self.kb.indirect_jumps.update_resolved_addrs(cfg_node.addr, more_successors) 2330 2331 else: 2332 l.debug('Failed to resolve the indirect jump.') 2333 self.kb.unresolved_indirect_jumps.add(cfg_node.addr) 2334 2335 else: 2336 if not successors: 2337 l.debug('Cannot resolve the indirect jump without advanced backward slicing enabled: %s', 2338 cfg_node) 2339 2340 # Try to find more successors if we failed to resolve the indirect jump before 2341 if exception_info is None and (cfg_node.is_simprocedure or self._is_indirect_jump(cfg_node, sim_successors)): 2342 has_call_jumps = any(suc_state.history.jumpkind == 'Ijk_Call' for suc_state in successors) 2343 if has_call_jumps: 2344 concrete_successors = [suc_state for suc_state in successors if 2345 suc_state.history.jumpkind != 'Ijk_FakeRet' and not suc_state.solver.symbolic( 2346 suc_state.ip)] 2347 else: 2348 concrete_successors = [suc_state for suc_state in successors if 2349 not suc_state.solver.symbolic(suc_state.ip)] 2350 symbolic_successors = [suc_state for suc_state in successors if suc_state.solver.symbolic(suc_state.ip)] 2351 2352 resolved = True if not symbolic_successors else False 2353 if symbolic_successors: 2354 for suc in symbolic_successors: 2355 if o.SYMBOLIC in suc.options: 2356 targets = suc.solver.eval_upto(suc.ip, 32) 2357 if len(targets) < 32: 2358 all_successors = [] 2359 resolved = True 2360 for t in targets: 2361 new_ex = suc.copy() 2362 new_ex.ip = suc.solver.BVV(t, suc.ip.size()) 2363 all_successors.append(new_ex) 2364 else: 2365 break 2366 2367 if not resolved and ( 2368 (symbolic_successors and not concrete_successors) or 2369 (not cfg_node.is_simprocedure and self._is_indirect_jump(cfg_node, sim_successors)) 2370 ): 2371 l.debug("%s has an indirect jump. See what we can do about it.", cfg_node) 2372 2373 if sim_successors.sort == 'SimProcedure' and \ 2374 sim_successors.artifacts['adds_exits']: 2375 # Skip those SimProcedures that don't create new SimExits 2376 l.debug('We got a SimProcedure %s in fastpath mode that creates new exits.', sim_successors.description) 2377 if self._enable_symbolic_back_traversal: 2378 successors = self._symbolically_back_traverse(sim_successors, artifacts, cfg_node) 2379 # mark jump as resolved if we got successors 2380 if successors: 2381 succ_addrs = [s.addr for s in successors] 2382 self.kb.indirect_jumps.update_resolved_addrs(cfg_node.addr, succ_addrs) 2383 else: 2384 self.kb.unresolved_indirect_jumps.add(cfg_node.addr) 2385 l.debug("Got %d concrete exits in symbolic mode.", len(successors)) 2386 else: 2387 self.kb.unresolved_indirect_jumps.add(cfg_node.addr) 2388 # keep fake_rets 2389 successors = [s for s in successors if s.history.jumpkind == "Ijk_FakeRet"] 2390 2391 elif sim_successors.sort == 'IRSB'and \ 2392 any([ex.history.jumpkind != 'Ijk_Ret' for ex in successors]): 2393 # We cannot properly handle Return as that requires us start execution from the caller... 2394 l.debug("Try traversal backwards in symbolic mode on %s.", cfg_node) 2395 if self._enable_symbolic_back_traversal: 2396 successors = self._symbolically_back_traverse(sim_successors, artifacts, cfg_node) 2397 2398 # Remove successors whose IP doesn't make sense 2399 successors = [suc for suc in successors 2400 if self._is_address_executable(suc.solver.eval_one(suc.ip))] 2401 2402 # mark jump as resolved if we got successors 2403 if successors: 2404 succ_addrs = [s.addr for s in successors] 2405 self.kb.indirect_jumps.update_resolved_addrs(cfg_node.addr, succ_addrs) 2406 else: 2407 self.kb.unresolved_indirect_jumps.add(cfg_node.addr) 2408 l.debug('Got %d concrete exits in symbolic mode', len(successors)) 2409 else: 2410 self.kb.unresolved_indirect_jumps.add(cfg_node.addr) 2411 successors = [] 2412 2413 elif successors and all([ex.history.jumpkind == 'Ijk_Ret' for ex in successors]): 2414 l.debug('All exits are returns (Ijk_Ret). It will be handled by pending exits.') 2415 2416 else: 2417 l.debug('Cannot resolve this indirect jump: %s', cfg_node) 2418 self.kb.unresolved_indirect_jumps.add(cfg_node.addr) 2419 2420 return successors 2421 2422 def _backward_slice_indirect(self, cfgnode, sim_successors, current_function_addr): 2423 """ 2424 Try to resolve an indirect jump by slicing backwards 2425 """ 2426 # TODO: make this a real indirect jump resolver under the new paradigm 2427 2428 irsb = sim_successors.artifacts['irsb'] # shorthand 2429 2430 l.debug("Resolving indirect jump at IRSB %s", irsb) 2431 2432 # Let's slice backwards from the end of this exit 2433 next_tmp = irsb.next.tmp 2434 stmt_id = [i for i, s in enumerate(irsb.statements) 2435 if isinstance(s, pyvex.IRStmt.WrTmp) and s.tmp == next_tmp][0] 2436 2437 cdg = self.project.analyses.CDG(cfg=self, fail_fast=self._fail_fast) 2438 ddg = self.project.analyses.DDG(cfg=self, start=current_function_addr, call_depth=0, fail_fast=self._fail_fast) 2439 2440 bc = self.project.analyses.BackwardSlice(self, 2441 cdg, 2442 ddg, 2443 targets=[(cfgnode, stmt_id)], 2444 same_function=True, 2445 fail_fast=self._fail_fast) 2446 taint_graph = bc.taint_graph 2447 # Find the correct taint 2448 next_nodes = [cl for cl in taint_graph.nodes() if cl.block_addr == sim_successors.addr] 2449 2450 if not next_nodes: 2451 l.error('The target exit is not included in the slice. Something is wrong') 2452 return [] 2453 2454 next_node = next_nodes[0] 2455 2456 # Get the weakly-connected subgraph that contains `next_node` 2457 all_subgraphs = ( networkx.induced_subgraph(taint_graph, nodes) for nodes in networkx.weakly_connected_components(taint_graph)) 2458 starts = set() 2459 for subgraph in all_subgraphs: 2460 if next_node in subgraph: 2461 # Make sure there is no symbolic read... 2462 # FIXME: This is an over-approximation. We should try to limit the starts more 2463 nodes = [n for n in subgraph.nodes() if subgraph.in_degree(n) == 0] 2464 for n in nodes: 2465 starts.add(n.block_addr) 2466 2467 # Execute the slice 2468 successing_addresses = set() 2469 annotated_cfg = bc.annotated_cfg() 2470 for start in starts: 2471 l.debug('Start symbolic execution at 0x%x on program slice.', start) 2472 # Get the state from our CFG 2473 node = self.get_any_node(start) 2474 if node is None: 2475 # Well, we have to live with an empty state 2476 base_state = self.project.factory.blank_state(addr=start) 2477 else: 2478 base_state = node.input_state.copy() 2479 base_state.set_mode('symbolic') 2480 base_state.ip = start 2481 2482 # Clear all initial taints (register values, memory values, etc.) 2483 initial_nodes = [n for n in bc.taint_graph.nodes() if bc.taint_graph.in_degree(n) == 0] 2484 for cl in initial_nodes: 2485 # Iterate in all actions of this node, and pick corresponding actions 2486 cfg_nodes = self.get_all_nodes(cl.block_addr) 2487 for n in cfg_nodes: 2488 if not n.final_states: 2489 continue 2490 actions = [ac for ac in n.final_states[0].history.recent_actions 2491 # Normally it's enough to only use the first final state 2492 if ac.bbl_addr == cl.block_addr and 2493 ac.stmt_idx == cl.stmt_idx 2494 ] 2495 for ac in actions: 2496 if not hasattr(ac, 'action'): 2497 continue 2498 if ac.action == 'read': 2499 if ac.type == 'mem': 2500 unconstrained_value = base_state.solver.Unconstrained('unconstrained', 2501 ac.size.ast * 8) 2502 base_state.memory.store(ac.addr, 2503 unconstrained_value, 2504 endness=self.project.arch.memory_endness) 2505 elif ac.type == 'reg': 2506 unconstrained_value = base_state.solver.Unconstrained('unconstrained', 2507 ac.size.ast * 8) 2508 base_state.registers.store(ac.offset, 2509 unconstrained_value, 2510 endness=self.project.arch.register_endness) 2511 2512 # Clear the constraints! 2513 base_state.release_plugin('solver') 2514 2515 # For speed concerns, we are limiting the timeout for z3 solver to 5 seconds 2516 base_state.solver._solver.timeout = 5000 2517 2518 sc = self.project.factory.simulation_manager(base_state) 2519 sc.use_technique(Slicecutor(annotated_cfg)) 2520 sc.use_technique(LoopSeer(bound=1)) 2521 sc.run() 2522 2523 if sc.cut or sc.deadended: 2524 all_deadended_states = sc.cut + sc.deadended 2525 for s in all_deadended_states: 2526 if s.addr == sim_successors.addr: 2527 # We want to get its successors 2528 succs = s.step() 2529 for succ in succs.flat_successors: 2530 successing_addresses.add(succ.addr) 2531 2532 else: 2533 l.debug("Cannot determine the exit. You need some better ways to recover the exits :-(") 2534 2535 l.debug('Resolution is done, and we have %d new successors.', len(successing_addresses)) 2536 2537 return list(successing_addresses) 2538 2539 def _symbolically_back_traverse(self, current_block, block_artifacts, cfg_node): 2540 """ 2541 Symbolically executes from ancestor nodes (2-5 times removed) finding paths of execution through the given 2542 CFGNode. 2543 2544 :param SimSuccessors current_block: SimSuccessor with address to attempt to navigate to. 2545 :param dict block_artifacts: Container of IRSB data - specifically used for known persistant register values. 2546 :param CFGNode cfg_node: Current node interested around. 2547 :returns: Double checked concrete successors. 2548 :rtype: List 2549 """ 2550 class register_protector: 2551 def __init__(self, reg_offset, info_collection): 2552 """ 2553 Class to overwrite registers. 2554 2555 :param int reg_offest: Register offset to overwrite from. 2556 :param dict info_collection: New register offsets to use (in container). 2557 """ 2558 self._reg_offset = reg_offset 2559 self._info_collection = info_collection 2560 2561 def write_persistent_register(self, state_): 2562 """ 2563 Writes over given registers from self._info_coollection (taken from block_artifacts) 2564 2565 :param SimSuccessors state_: state to update registers for 2566 """ 2567 if state_.inspect.address is None: 2568 l.error('state.inspect.address is None. It will be fixed by Yan later.') 2569 return 2570 2571 if state_.registers.load(self._reg_offset).symbolic: 2572 current_run = state_.inspect.address 2573 if current_run in self._info_collection and \ 2574 not state_.solver.symbolic(self._info_collection[current_run][self._reg_offset]): 2575 l.debug("Overwriting %s with %s", state_.registers.load(self._reg_offset), 2576 self._info_collection[current_run][self._reg_offset]) 2577 state_.registers.store( 2578 self._reg_offset, 2579 self._info_collection[current_run][self._reg_offset] 2580 ) 2581 2582 l.debug("Start back traversal from %s", current_block) 2583 2584 # Create a partial CFG first 2585 temp_cfg = networkx.DiGraph(self.graph) 2586 # Reverse it 2587 temp_cfg.reverse(copy=False) 2588 2589 path_length = 0 2590 concrete_exits = [] 2591 if cfg_node not in temp_cfg.nodes(): 2592 # TODO: Figure out why this is happening 2593 return concrete_exits 2594 2595 keep_running = True 2596 while not concrete_exits and path_length < 5 and keep_running: 2597 path_length += 1 2598 queue = [cfg_node] 2599 avoid = set() 2600 for _ in range(path_length): 2601 new_queue = [] 2602 for n in queue: 2603 successors = list(temp_cfg.successors(n)) 2604 for suc in successors: 2605 jk = temp_cfg.get_edge_data(n, suc)['jumpkind'] 2606 if jk != 'Ijk_Ret': 2607 # We don't want to trace into libraries 2608 predecessors = list(temp_cfg.predecessors(suc)) 2609 avoid |= {p.addr for p in predecessors if p is not n} 2610 new_queue.append(suc) 2611 queue = new_queue 2612 2613 if path_length <= 1: 2614 continue 2615 2616 for n in queue: 2617 # Start symbolic exploration from each block 2618 state = self.project.factory.blank_state(addr=n.addr, 2619 mode='symbolic', 2620 add_options={ 2621 o.DO_RET_EMULATION, 2622 o.CONSERVATIVE_READ_STRATEGY, 2623 } | o.resilience 2624 ) 2625 # Avoid concretization of any symbolic read address that is over a certain limit 2626 # TODO: test case is needed for this option 2627 2628 # Set initial values of persistent regs 2629 if n.addr in block_artifacts: 2630 for reg in state.arch.persistent_regs: 2631 state.registers.store(reg, block_artifacts[n.addr][reg]) 2632 for reg in state.arch.persistent_regs: 2633 reg_protector = register_protector(reg, block_artifacts) 2634 state.inspect.add_breakpoint('reg_write', 2635 BP( 2636 BP_AFTER, 2637 reg_write_offset=state.arch.registers[reg][0], 2638 action=reg_protector.write_persistent_register 2639 ) 2640 ) 2641 simgr = self.project.factory.simulation_manager(state) 2642 simgr.use_technique(LoopSeer(bound=10)) 2643 simgr.use_technique(Explorer(find=current_block.addr, avoid=avoid)) 2644 simgr.use_technique(LengthLimiter(path_length)) 2645 simgr.run() 2646 2647 if simgr.found: 2648 simgr = self.project.factory.simulation_manager(simgr.one_found, save_unsat=True) 2649 simgr.step() 2650 if simgr.active or simgr.unsat: 2651 keep_running = False 2652 concrete_exits.extend(simgr.active) 2653 concrete_exits.extend(simgr.unsat) 2654 if keep_running: 2655 l.debug('Step back for one more run...') 2656 2657 # Make sure these successors are actually concrete 2658 # We just use the ip, persistent registers, and jumpkind to initialize the original unsat state 2659 # TODO: It works for jumptables, but not for calls. We should also handle changes in sp 2660 new_concrete_successors = [] 2661 for c in concrete_exits: 2662 unsat_state = current_block.unsat_successors[0].copy() 2663 unsat_state.history.jumpkind = c.history.jumpkind 2664 for reg in unsat_state.arch.persistent_regs + ['ip']: 2665 unsat_state.registers.store(reg, c.registers.load(reg)) 2666 new_concrete_successors.append(unsat_state) 2667 2668 return new_concrete_successors 2669 2670 def _get_symbolic_function_initial_state(self, function_addr, fastpath_mode_state=None): 2671 """ 2672 Symbolically execute the first basic block of the specified function, 2673 then returns it. We prepares the state using the already existing 2674 state in fastpath mode (if avaiable). 2675 :param function_addr: The function address 2676 :return: A symbolic state if succeeded, None otherwise 2677 """ 2678 if function_addr is None: 2679 return None 2680 2681 if function_addr in self._symbolic_function_initial_state: 2682 return self._symbolic_function_initial_state[function_addr] 2683 2684 if fastpath_mode_state is not None: 2685 fastpath_state = fastpath_mode_state 2686 else: 2687 if function_addr in self._function_input_states: 2688 fastpath_state = self._function_input_states[function_addr] 2689 else: 2690 raise AngrCFGError('The impossible happened. Please report to Fish.') 2691 2692 symbolic_initial_state = self.project.factory.entry_state(mode='symbolic') 2693 if fastpath_state is not None: 2694 symbolic_initial_state = self.project.simos.prepare_call_state(fastpath_state, 2695 initial_state=symbolic_initial_state) 2696 2697 # Find number of instructions of start block 2698 func = self.project.kb.functions.get(function_addr) 2699 start_block = func._get_block(function_addr) 2700 num_instr = start_block.instructions - 1 2701 2702 symbolic_initial_state.ip = function_addr 2703 path = self.project.factory.path(symbolic_initial_state) 2704 try: 2705 sim_successors = self.project.factory.successors(path.state, num_inst=num_instr) 2706 except (SimError, AngrError): 2707 return None 2708 2709 # We execute all but the last instruction in this basic block, so we have a cleaner 2710 # state 2711 # Start execution! 2712 exits = sim_successors.flat_successors + sim_successors.unsat_successors 2713 2714 if exits: 2715 final_st = None 2716 for ex in exits: 2717 if ex.satisfiable(): 2718 final_st = ex 2719 break 2720 else: 2721 final_st = None 2722 2723 self._symbolic_function_initial_state[function_addr] = final_st 2724 2725 return final_st 2726 2727 # Private methods - function hints 2728 2729 def _search_for_function_hints(self, successor_state): 2730 """ 2731 Scan for constants that might be used as exit targets later, and add them into pending_exits. 2732 2733 :param SimState successor_state: A successing state. 2734 :return: A list of discovered code addresses. 2735 :rtype: list 2736 """ 2737 2738 function_hints = [] 2739 2740 for action in successor_state.history.recent_actions: 2741 if action.type == 'reg' and action.offset == self.project.arch.ip_offset: 2742 # Skip all accesses to IP registers 2743 continue 2744 elif action.type == 'exit': 2745 # only consider read/write actions 2746 continue 2747 2748 # Enumerate actions 2749 if isinstance(action, SimActionData): 2750 data = action.data 2751 if data is not None: 2752 # TODO: Check if there is a proper way to tell whether this const falls in the range of code 2753 # TODO: segments 2754 # Now let's live with this big hack... 2755 try: 2756 const = successor_state.solver.eval_one(data.ast) 2757 except: # pylint: disable=bare-except 2758 continue 2759 2760 if self._is_address_executable(const): 2761 if self._pending_function_hints is not None and const in self._pending_function_hints: 2762 continue 2763 2764 # target = const 2765 # tpl = (None, None, target) 2766 # st = self.project._simos.prepare_call_state(self.project.initial_state(mode='fastpath'), 2767 # initial_state=saved_state) 2768 # st = self.project.initial_state(mode='fastpath') 2769 # exits[tpl] = (st, None, None) 2770 2771 function_hints.append(const) 2772 2773 l.debug('Got %d possible exits, including: %s', len(function_hints), 2774 ", ".join(["0x%x" % f for f in function_hints]) 2775 ) 2776 2777 return function_hints 2778 2779 # Private methods - creation of stuff (SimSuccessors, CFGNode, call-stack, etc.) 2780 2781 def _get_simsuccessors(self, addr, job, current_function_addr=None): 2782 """ 2783 Create the SimSuccessors instance for a block. 2784 2785 :param int addr: Address of the block. 2786 :param CFGJob job: The CFG job instance with an input state inside. 2787 :param int current_function_addr: Address of the current function. 2788 :return: A SimSuccessors instance 2789 :rtype: SimSuccessors 2790 """ 2791 2792 exception_info = None 2793 state = job.state 2794 saved_state = job.state # We don't have to make a copy here 2795 2796 # respect the basic block size from base graph 2797 block_size = None 2798 if self._base_graph is not None: 2799 for n in self._base_graph.nodes(): 2800 if n.addr == addr: 2801 block_size = n.size 2802 break 2803 2804 try: 2805 sim_successors = None 2806 2807 if not self._keep_state: 2808 if self.project.is_hooked(addr): 2809 old_proc = self.project._sim_procedures[addr] 2810 is_continuation = old_proc.is_continuation 2811 elif self.project.simos.is_syscall_addr(addr): 2812 old_proc = self.project.simos.syscall_from_addr(addr) 2813 is_continuation = False # syscalls don't support continuation 2814 else: 2815 old_proc = None 2816 is_continuation = None 2817 2818 if old_proc is not None and \ 2819 not is_continuation and \ 2820 not old_proc.ADDS_EXITS and \ 2821 not old_proc.NO_RET: 2822 # DON'T CREATE USELESS SIMPROCEDURES if we don't care about the accuracy of states 2823 # When generating CFG, a SimProcedure will not be created as it is but be created as a 2824 # ReturnUnconstrained stub if it satisfies the following conditions: 2825 # - It doesn't add any new exits. 2826 # - It returns as normal. 2827 # In this way, we can speed up the CFG generation by quite a lot as we avoid simulating 2828 # those functions like read() and puts(), which has no impact on the overall control flow at all. 2829 # 2830 # Special notes about SimProcedure continuation: Any SimProcedure instance that is a continuation 2831 # will add new exits, otherwise the original SimProcedure wouldn't have been executed anyway. Hence 2832 # it's reasonable for us to always simulate a SimProcedure with continuation. 2833 2834 old_name = None 2835 2836 if old_proc.is_syscall: 2837 new_stub = SIM_PROCEDURES["stubs"]["syscall"] 2838 ret_to = state.regs.ip_at_syscall 2839 else: 2840 # normal SimProcedures 2841 new_stub = SIM_PROCEDURES["stubs"]["ReturnUnconstrained"] 2842 ret_to = None 2843 2844 old_name = old_proc.display_name 2845 2846 # instantiate the stub 2847 new_stub_inst = new_stub(display_name=old_name) 2848 2849 sim_successors = self.project.factory.procedure_engine.process( 2850 state, 2851 procedure=new_stub_inst, 2852 force_addr=addr, 2853 ret_to=ret_to, 2854 ) 2855 2856 if sim_successors is None: 2857 jumpkind = state.history.jumpkind 2858 jumpkind = 'Ijk_Boring' if jumpkind is None else jumpkind 2859 sim_successors = self.project.factory.successors( 2860 state, 2861 jumpkind=jumpkind, 2862 size=block_size, 2863 opt_level=self._iropt_level) 2864 2865 except (SimFastPathError, SimSolverModeError) as ex: 2866 2867 if saved_state.mode == 'fastpath': 2868 # Got a SimFastPathError or SimSolverModeError in FastPath mode. 2869 # We wanna switch to symbolic mode for current IRSB. 2870 l.debug('Switch to symbolic mode for address %#x', addr) 2871 # Make a copy of the current 'fastpath' state 2872 2873 l.debug('Symbolic jumps at basic block %#x.', addr) 2874 2875 new_state = None 2876 if addr != current_function_addr: 2877 new_state = self._get_symbolic_function_initial_state(current_function_addr) 2878 2879 if new_state is None: 2880 new_state = state.copy() 2881 new_state.set_mode('symbolic') 2882 new_state.options.add(o.DO_RET_EMULATION) 2883 # Remove bad constraints 2884 # FIXME: This is so hackish... 2885 new_state.solver._solver.constraints = [c for c in new_state.solver.constraints if 2886 c.op != 'BoolV' or c.args[0] is not False] 2887 new_state.solver._solver._result = None 2888 # Swap them 2889 saved_state, job.state = job.state, new_state 2890 sim_successors, exception_info, _ = self._get_simsuccessors(addr, job) 2891 2892 else: 2893 exception_info = sys.exc_info() 2894 # Got a SimSolverModeError in symbolic mode. We are screwed. 2895 # Skip this IRSB 2896 l.debug("Caught a SimIRSBError %s. Don't panic, this is usually expected.", ex) 2897 inst = SIM_PROCEDURES["stubs"]["PathTerminator"]() 2898 sim_successors = ProcedureEngine().process(state, procedure=inst) 2899 2900 except SimIRSBError: 2901 exception_info = sys.exc_info() 2902 # It's a tragedy that we came across some instructions that VEX 2903 # does not support. I'll create a terminating stub there 2904 l.debug("Caught a SimIRSBError during CFG recovery. Creating a PathTerminator.", exc_info=True) 2905 inst = SIM_PROCEDURES["stubs"]["PathTerminator"]() 2906 sim_successors = ProcedureEngine().process(state, procedure=inst) 2907 2908 except claripy.ClaripyError: 2909 exception_info = sys.exc_info() 2910 l.debug("Caught a ClaripyError during CFG recovery. Don't panic, this is usually expected.", exc_info=True) 2911 # Generate a PathTerminator to terminate the current path 2912 inst = SIM_PROCEDURES["stubs"]["PathTerminator"]() 2913 sim_successors = ProcedureEngine().process(state, procedure=inst) 2914 2915 except SimError: 2916 exception_info = sys.exc_info() 2917 l.debug("Caught a SimError during CFG recovery. Don't panic, this is usually expected.", exc_info=True) 2918 # Generate a PathTerminator to terminate the current path 2919 inst = SIM_PROCEDURES["stubs"]["PathTerminator"]() 2920 sim_successors = ProcedureEngine().process(state, procedure=inst) 2921 2922 except AngrExitError as ex: 2923 exception_info = sys.exc_info() 2924 l.debug("Caught a AngrExitError during CFG recovery. Don't panic, this is usually expected.", exc_info=True) 2925 # Generate a PathTerminator to terminate the current path 2926 inst = SIM_PROCEDURES["stubs"]["PathTerminator"]() 2927 sim_successors = ProcedureEngine().process(state, procedure=inst) 2928 2929 except AngrError: 2930 exception_info = sys.exc_info() 2931 section = self.project.loader.main_object.find_section_containing(addr) 2932 if section is None: 2933 sec_name = 'No section' 2934 else: 2935 sec_name = section.name 2936 # AngrError shouldn't really happen though 2937 l.debug("Caught an AngrError during CFG recovery at %#x (%s)", 2938 addr, sec_name, exc_info=True) 2939 # We might be on a wrong branch, and is likely to encounter the 2940 # "No bytes in memory xxx" exception 2941 # Just ignore it 2942 sim_successors = None 2943 2944 return sim_successors, exception_info, saved_state 2945 2946 def _create_new_call_stack(self, addr, all_jobs, job, exit_target, jumpkind): 2947 """ 2948 Creates a new call stack, and according to the jumpkind performs appropriate actions. 2949 2950 :param int addr: Address to create at. 2951 :param Simsuccessors all_jobs: Jobs to get stack pointer from or return address. 2952 :param CFGJob job: CFGJob to copy current call stack from. 2953 :param int exit_target: Address of exit target. 2954 :param str jumpkind: The type of jump performed. 2955 :returns: New call stack for target block. 2956 :rtype: CallStack 2957 """ 2958 2959 if self._is_call_jumpkind(jumpkind): 2960 new_call_stack = job.call_stack_copy() 2961 # Notice that in ARM, there are some freaking instructions 2962 # like 2963 # BLEQ <address> 2964 # It should give us three exits: Ijk_Call, Ijk_Boring, and 2965 # Ijk_Ret. The last exit is simulated. 2966 # Notice: We assume the last exit is the simulated one 2967 if len(all_jobs) > 1 and all_jobs[-1].history.jumpkind == "Ijk_FakeRet": 2968 se = all_jobs[-1].solver 2969 retn_target_addr = se.eval_one(all_jobs[-1].ip, default=0) 2970 sp = se.eval_one(all_jobs[-1].regs.sp, default=0) 2971 2972 new_call_stack = new_call_stack.call(addr, exit_target, 2973 retn_target=retn_target_addr, 2974 stack_pointer=sp) 2975 2976 elif jumpkind.startswith('Ijk_Sys') and len(all_jobs) == 1: 2977 # This is a syscall. It returns to the same address as itself (with a different jumpkind) 2978 retn_target_addr = exit_target 2979 se = all_jobs[0].solver 2980 sp = se.eval_one(all_jobs[0].regs.sp, default=0) 2981 new_call_stack = new_call_stack.call(addr, exit_target, 2982 retn_target=retn_target_addr, 2983 stack_pointer=sp) 2984 2985 else: 2986 # We don't have a fake return exit available, which means 2987 # this call doesn't return. 2988 new_call_stack = CallStack() 2989 se = all_jobs[-1].solver 2990 sp = se.eval_one(all_jobs[-1].regs.sp, default=0) 2991 2992 new_call_stack = new_call_stack.call(addr, exit_target, retn_target=None, stack_pointer=sp) 2993 2994 elif jumpkind == "Ijk_Ret": 2995 # Normal return 2996 new_call_stack = job.call_stack_copy() 2997 try: 2998 new_call_stack = new_call_stack.ret(exit_target) 2999 except SimEmptyCallStackError: 3000 pass 3001 3002 se = all_jobs[-1].solver 3003 sp = se.eval_one(all_jobs[-1].regs.sp, default=0) 3004 old_sp = job.current_stack_pointer 3005 3006 # Calculate the delta of stack pointer 3007 if sp is not None and old_sp is not None: 3008 delta = sp - old_sp 3009 func_addr = job.func_addr 3010 3011 if self.kb.functions.function(func_addr) is None: 3012 # Create the function if it doesn't exist 3013 # FIXME: But hell, why doesn't it exist in the first place? 3014 l.error("Function 0x%x doesn't exist in function manager although it should be there." 3015 "Look into this issue later.", 3016 func_addr) 3017 self.kb.functions.function(func_addr, create=True) 3018 3019 # Set sp_delta of the function 3020 self.kb.functions.function(func_addr, create=True).sp_delta = delta 3021 3022 elif jumpkind == 'Ijk_FakeRet': 3023 # The fake return... 3024 new_call_stack = job.call_stack 3025 3026 else: 3027 # although the jumpkind is not Ijk_Call, it may still jump to a new function... let's see 3028 if self.project.is_hooked(exit_target): 3029 hooker = self.project.hooked_by(exit_target) 3030 if not hooker is procedures.stubs.UserHook.UserHook: 3031 # if it's not a UserHook, it must be a function 3032 # Update the function address of the most recent call stack frame 3033 new_call_stack = job.call_stack_copy() 3034 new_call_stack.current_function_address = exit_target 3035 3036 else: 3037 # TODO: We need a way to mark if a user hook is a function or not 3038 # TODO: We can add a map in Project to store this information 3039 # For now we are just assuming they are not functions, which is mostly the case 3040 new_call_stack = job.call_stack 3041 3042 else: 3043 # Normal control flow transition 3044 new_call_stack = job.call_stack 3045 3046 return new_call_stack 3047 3048 def _create_cfgnode(self, sim_successors, call_stack, func_addr, block_id=None, depth=None, exception_info=None): 3049 """ 3050 Create a context-sensitive CFGNode instance for a specific block. 3051 3052 :param SimSuccessors sim_successors: The SimSuccessors object. 3053 :param CallStack call_stack_suffix: The call stack. 3054 :param int func_addr: Address of the current function. 3055 :param BlockID block_id: The block ID of this CFGNode. 3056 :param int or None depth: Depth of this CFGNode. 3057 :return: A CFGNode instance. 3058 :rtype: CFGNode 3059 """ 3060 3061 sa = sim_successors.artifacts # shorthand 3062 3063 # Determine if this is a SimProcedure, and further, if this is a syscall 3064 syscall = None 3065 is_syscall = False 3066 if sim_successors.sort == 'SimProcedure': 3067 is_simprocedure = True 3068 if sa['is_syscall'] is True: 3069 is_syscall = True 3070 syscall = sim_successors.artifacts['procedure'] 3071 else: 3072 is_simprocedure = False 3073 3074 if is_simprocedure: 3075 simproc_name = sa['name'].split('.')[-1] 3076 if simproc_name == "ReturnUnconstrained" and 'resolves' in sa and sa['resolves'] is not None: 3077 simproc_name = sa['resolves'] 3078 3079 no_ret = False 3080 if syscall is not None and sa['no_ret']: 3081 no_ret = True 3082 3083 cfg_node = CFGENode(sim_successors.addr, 3084 None, 3085 self.model, 3086 callstack_key=call_stack.stack_suffix(self.context_sensitivity_level), 3087 input_state=None, 3088 simprocedure_name=simproc_name, 3089 syscall_name=syscall, 3090 no_ret=no_ret, 3091 is_syscall=is_syscall, 3092 function_address=sim_successors.addr, 3093 block_id=block_id, 3094 depth=depth, 3095 creation_failure_info=exception_info, 3096 thumb=(isinstance(self.project.arch, ArchARM) and sim_successors.addr & 1), 3097 ) 3098 3099 else: 3100 cfg_node = CFGENode(sim_successors.addr, 3101 sa['irsb_size'], 3102 self.model, 3103 callstack_key=call_stack.stack_suffix(self.context_sensitivity_level), 3104 input_state=None, 3105 is_syscall=is_syscall, 3106 function_address=func_addr, 3107 block_id=block_id, 3108 depth=depth, 3109 irsb=sim_successors.artifacts['irsb'], 3110 creation_failure_info=exception_info, 3111 thumb=(isinstance(self.project.arch, ArchARM) and sim_successors.addr & 1), 3112 ) 3113 3114 return cfg_node 3115 3116 # Private methods - loops and graph normalization 3117 3118 def _detect_loops(self, loop_callback=None): 3119 """ 3120 Loop detection. 3121 3122 :param func loop_callback: A callback function for each detected loop backedge. 3123 :return: None 3124 """ 3125 3126 loop_finder = self.project.analyses.LoopFinder(kb=self.kb, normalize=False, fail_fast=self._fail_fast) 3127 3128 if loop_callback is not None: 3129 graph_copy = networkx.DiGraph(self._graph) 3130 3131 for loop in loop_finder.loops: # type: angr.analyses.loopfinder.Loop 3132 loop_callback(graph_copy, loop) 3133 3134 self.model.graph = graph_copy 3135 3136 # Update loop backedges and graph 3137 self._loop_back_edges = list(itertools.chain.from_iterable(loop.continue_edges for loop in loop_finder.loops)) 3138 3139 # Private methods - function/procedure/subroutine analysis 3140 # Including calling convention, function arguments, etc. 3141 3142 def _refine_function_arguments(self, func, callsites): 3143 """ 3144 3145 :param func: 3146 :param callsites: 3147 :return: 3148 """ 3149 3150 for i, c in enumerate(callsites): 3151 # Execute one block ahead of the callsite, and execute one basic block after the callsite 3152 # In this process, the following tasks are performed: 3153 # - Record registers/stack variables that are modified 3154 # - Record the change of the stack pointer 3155 # - Check if the return value is used immediately 3156 # We assume that the stack is balanced before and after the call (you can have caller clean-up of course). 3157 # Any abnormal behaviors will be logged. 3158 # Hopefully this approach will allow us to have a better understanding of parameters of those function 3159 # stubs and function proxies. 3160 3161 if c.simprocedure_name is not None: 3162 # Skip all SimProcedures 3163 continue 3164 3165 l.debug("Refining %s at 0x%x (%d/%d).", repr(func), c.addr, i, len(callsites)) 3166 3167 # Get a basic block ahead of the callsite 3168 blocks_ahead = [c] 3169 3170 # the block after 3171 blocks_after = [] 3172 successors = self.get_successors_and_jumpkind(c, excluding_fakeret=False) 3173 for s, jk in successors: 3174 if jk == 'Ijk_FakeRet': 3175 blocks_after = [s] 3176 break 3177 3178 regs_overwritten = set() 3179 stack_overwritten = set() 3180 regs_read = set() 3181 regs_written = set() 3182 3183 try: 3184 # Execute the predecessor 3185 path = self.project.factory.path( 3186 self.project.factory.blank_state(mode="fastpath", addr=blocks_ahead[0].addr)) 3187 path.step() 3188 all_successors = path.next_run.successors + path.next_run.unsat_successors 3189 if not all_successors: 3190 continue 3191 3192 suc = all_successors[0] 3193 se = suc.solver 3194 # Examine the path log 3195 actions = suc.history.recent_actions 3196 sp = se.eval_one(suc.regs.sp, default=0) + self.project.arch.call_sp_fix 3197 for ac in actions: 3198 if ac.type == "reg" and ac.action == "write": 3199 regs_overwritten.add(ac.offset) 3200 elif ac.type == "mem" and ac.action == "write": 3201 addr = se.eval_one(ac.addr.ast, default=0) 3202 if (self.project.arch.call_pushes_ret and addr >= sp + self.project.arch.bytes) or \ 3203 (not self.project.arch.call_pushes_ret and addr >= sp): 3204 offset = addr - sp 3205 stack_overwritten.add(offset) 3206 3207 func.prepared_registers.add(tuple(regs_overwritten)) 3208 func.prepared_stack_variables.add(tuple(stack_overwritten)) 3209 3210 except (SimError, AngrError): 3211 pass 3212 3213 try: 3214 if blocks_after: 3215 path = self.project.factory.path( 3216 self.project.factory.blank_state(mode="fastpath", addr=blocks_after[0].addr)) 3217 path.step() 3218 all_successors = path.next_run.successors + path.next_run.unsat_successors 3219 if not all_successors: 3220 continue 3221 3222 suc = all_successors[0] 3223 actions = suc.history.recent_actions 3224 for ac in actions: 3225 if ac.type == "reg" and ac.action == "read" and ac.offset not in regs_written: 3226 regs_read.add(ac.offset) 3227 elif ac.type == "reg" and ac.action == "write": 3228 regs_written.add(ac.offset) 3229 3230 # Filter registers, remove unnecessary registers from the set 3231 # regs_overwritten = self.project.arch.argument_registers.intersection(regs_overwritten) 3232 regs_read = self.project.arch.argument_registers.intersection(regs_read) 3233 3234 func.registers_read_afterwards.add(tuple(regs_read)) 3235 3236 except (SimError, AngrError): 3237 pass 3238 3239 # Private methods - dominators and post-dominators 3240 3241 def _immediate_dominators(self, node, target_graph=None, reverse_graph=False): 3242 """ 3243 Get all immediate dominators of sub graph from given node upwards. 3244 3245 :param str node: id of the node to navigate forwards from. 3246 :param networkx.classes.digraph.DiGraph target_graph: graph to analyse, default is self.graph. 3247 :param bool reverse_graph: Whether the target graph should be reversed before analysation. 3248 3249 :return: each node of graph as index values, with element as respective node's immediate dominator. 3250 :rtype: dict 3251 """ 3252 if target_graph is None: 3253 target_graph = self.graph 3254 3255 if node not in target_graph: 3256 raise AngrCFGError('Target node %s is not in graph.' % node) 3257 3258 graph = networkx.DiGraph(target_graph) 3259 if reverse_graph: 3260 # Reverse the graph without deepcopy 3261 for n in target_graph.nodes(): 3262 graph.add_node(n) 3263 for src, dst in target_graph.edges(): 3264 graph.add_edge(dst, src) 3265 3266 idom = {node: node} 3267 3268 order = list(networkx.dfs_postorder_nodes(graph, node)) 3269 dfn = {u: i for i, u in enumerate(order)} 3270 order.pop() 3271 order.reverse() 3272 3273 def intersect(u_, v_): 3274 """ 3275 Finds the highest (in postorder valuing) point of intersection above two node arguments. 3276 3277 :param str u_: nx node id. 3278 :param str v_: nx node id. 3279 :return: intersection of paths. 3280 :rtype: str 3281 """ 3282 while u_ != v_: 3283 while dfn[u_] < dfn[v_]: 3284 u_ = idom[u_] 3285 while dfn[u_] > dfn[v_]: 3286 v_ = idom[v_] 3287 return u_ 3288 3289 changed = True 3290 while changed: 3291 changed = False 3292 for u in order: 3293 new_idom = reduce(intersect, (v for v in graph.pred[u] if v in idom)) 3294 if u not in idom or idom[u] != new_idom: 3295 idom[u] = new_idom 3296 changed = True 3297 3298 return idom 3299 3300 # 3301 # Static private utility methods 3302 # 3303 3304 @staticmethod 3305 def _is_indirect_jump(_, sim_successors): 3306 """ 3307 Determine if this SimIRSB has an indirect jump as its exit 3308 """ 3309 3310 if sim_successors.artifacts['irsb_direct_next']: 3311 # It's a direct jump 3312 return False 3313 3314 default_jumpkind = sim_successors.artifacts['irsb_default_jumpkind'] 3315 if default_jumpkind not in ('Ijk_Call', 'Ijk_Boring', 'Ijk_InvalICache'): 3316 # It's something else, like a ret of a syscall... we don't care about it 3317 return False 3318 3319 return True 3320 3321 @staticmethod 3322 def _generate_block_id(call_stack_suffix, block_addr, is_syscall): 3323 if not is_syscall: 3324 return BlockID.new(block_addr, call_stack_suffix, 'normal') 3325 return BlockID.new(block_addr, call_stack_suffix, 'syscall') 3326 3327 @staticmethod 3328 def _block_id_repr(block_id): 3329 return repr(block_id) 3330 3331 @staticmethod 3332 def _block_id_callstack_key(block_id): 3333 return block_id.callsite_tuples 3334 3335 @staticmethod 3336 def _block_id_addr(block_id): 3337 return block_id.addr 3338 3339 @staticmethod 3340 def _block_id_current_func_addr(block_id): 3341 """ 3342 If we don't have any information about the caller, we have no way to get the address of the current function. 3343 3344 :param BlockID block_id: The block ID. 3345 :return: The function address if there is one, or None if it's not possible to get. 3346 :rtype: int or None 3347 """ 3348 3349 if block_id.callsite_tuples: 3350 return block_id.callsite_tuples[-1] 3351 return None 3352 3353 # 3354 # Other private utility methods 3355 # 3356 3357 @staticmethod 3358 def _is_call_jumpkind(jumpkind): 3359 if jumpkind == 'Ijk_Call' or jumpkind.startswith('Ijk_Sys_'): 3360 return True 3361 return False 3362 3363 def _push_unresolvable_run(self, block_address): 3364 """ 3365 Adds this block to the list of unresolvable runs. 3366 3367 :param dict block_address: container of IRSB data from run. 3368 """ 3369 self._unresolvable_runs.add(block_address) 3370 3371 def _is_address_executable(self, address): 3372 """ 3373 Check if the specific address is in one of the executable ranges. 3374 3375 :param int address: The address 3376 :return: True if it's in an executable range, False otherwise 3377 """ 3378 3379 for r in self._executable_address_ranges: 3380 if r[0] <= address < r[1]: 3381 return True 3382 return False 3383 3384 def _update_thumb_addrs(self, simsuccessors, state): 3385 """ 3386 3387 :return: 3388 """ 3389 3390 # For ARM THUMB mode 3391 if simsuccessors.sort == 'IRSB' and state.thumb: 3392 insn_addrs = simsuccessors.artifacts['insn_addrs'] 3393 self._thumb_addrs.update(insn_addrs) 3394 self._thumb_addrs.update(map(lambda x: x + 1, insn_addrs)) 3395 3396 def _get_callsites(self, function_address): 3397 """ 3398 Get where a specific function is called. 3399 :param function_address: Address of the target function 3400 :return: A list of CFGNodes whose exits include a call/jump to the given function 3401 """ 3402 3403 all_predecessors = [] 3404 3405 nodes = self.get_all_nodes(function_address) 3406 for n in nodes: 3407 predecessors = list(self.get_predecessors(n)) 3408 all_predecessors.extend(predecessors) 3409 3410 return all_predecessors 3411 3412 def _get_nx_paths(self, begin, end): 3413 """ 3414 Get the possible (networkx) simple paths between two nodes or addresses 3415 corresponding to nodes. 3416 Input: addresses or node instances 3417 Return: a list of lists of nodes representing paths. 3418 """ 3419 if isinstance(begin, int) and isinstance(end, int): 3420 n_begin = self.get_any_node(begin) 3421 n_end = self.get_any_node(end) 3422 3423 elif isinstance(begin, CFGENode) and isinstance(end, CFGENode): 3424 n_begin = begin 3425 n_end = end 3426 else: 3427 raise AngrCFGError("from and to should be of the same type") 3428 3429 self.remove_fakerets() 3430 return networkx.all_shortest_paths(self.graph, n_begin, n_end) 3431 3432 def _quasi_topological_sort(self): 3433 """ 3434 Perform a quasi-topological sort on an already constructed CFG graph (a networkx DiGraph) 3435 3436 :return: None 3437 """ 3438 3439 # Clear the existing sorting result 3440 self._quasi_topological_order = {} 3441 3442 ctr = self._graph.number_of_nodes() 3443 3444 for ep in self._entry_points: 3445 # FIXME: This is not always correct. We'd better store CFGNodes in self._entry_points 3446 ep_node = self.get_any_node(ep) 3447 3448 if not ep_node: 3449 continue 3450 3451 for n in networkx.dfs_postorder_nodes(self._graph, source=ep_node): 3452 if n not in self._quasi_topological_order: 3453 self._quasi_topological_order[n] = ctr 3454 ctr -= 1 3455 3456 def _reset_state_mode(self, state, mode): 3457 """ 3458 Reset the state mode to the given mode, and apply the custom state options specified with this analysis. 3459 3460 :param state: The state to work with. 3461 :param str mode: The state mode. 3462 :return: None 3463 """ 3464 3465 state.set_mode(mode) 3466 state.options |= self._state_add_options 3467 state.options = state.options.difference(self._state_remove_options) 3468 3469from angr.analyses import AnalysesHub 3470AnalysesHub.register_default('CFGEmulated', CFGEmulated) 3471