1import itertools
2import logging
3import sys
4from collections import defaultdict
5from functools import reduce
6from typing import Dict
7
8import claripy
9import networkx
10import pyvex
11from archinfo import ArchARM
12
13
14from ... import BP, BP_BEFORE, BP_AFTER, SIM_PROCEDURES, procedures
15from ... import options as o
16from ...engines.procedure import ProcedureEngine
17from ...exploration_techniques.loop_seer import LoopSeer
18from ...exploration_techniques.slicecutor import Slicecutor
19from ...exploration_techniques.explorer import Explorer
20from ...exploration_techniques.lengthlimiter import LengthLimiter
21from ...errors import AngrCFGError, AngrError, AngrSkipJobNotice, SimError, SimValueError, SimSolverModeError, \
22    SimFastPathError, SimIRSBError, AngrExitError, SimEmptyCallStackError
23from ...sim_state import SimState
24from ...state_plugins.callstack import CallStack
25from ...state_plugins.sim_action import SimActionData
26from ...knowledge_plugins.cfg import CFGENode, IndirectJump
27from ...utils.constants import DEFAULT_STATEMENT
28from ..forward_analysis import ForwardAnalysis
29from .cfg_base import CFGBase
30from .cfg_job_base import BlockID, CFGJobBase
31from .cfg_utils import CFGUtils
32
33l = logging.getLogger(name=__name__)
34
35
36class CFGJob(CFGJobBase):
37    def __init__(self, *args, **kwargs):
38        super(CFGJob, self).__init__(*args, **kwargs)
39
40        #
41        # local variables used during analysis
42        #
43
44        if self.jumpkind is None:
45            # load jumpkind from path.state.scratch
46            self.jumpkind = 'Ijk_Boring' if self.state.history.jumpkind is None else \
47                self.state.history.jumpkind
48
49        self.call_stack_suffix = None
50        self.current_function = None
51
52        self.cfg_node = None
53        self.sim_successors = None
54        self.exception_info = None
55        self.successor_status = None
56
57        self.extra_info = None
58
59    @property
60    def block_id(self):
61        if self._block_id is None:
62            # generate a new block ID
63            self._block_id = CFGEmulated._generate_block_id(
64                self.call_stack.stack_suffix(self._context_sensitivity_level), self.addr, self.is_syscall)
65        return self._block_id
66
67    @property
68    def is_syscall(self):
69        return self.jumpkind is not None and self.jumpkind.startswith('Ijk_Sys')
70
71    def __hash__(self):
72        return hash(self.block_id)
73
74
75class PendingJob:
76    def __init__(self, caller_func_addr, returning_source, state, src_block_id, src_exit_stmt_idx, src_exit_ins_addr,
77                 call_stack):
78        """
79        A PendingJob is whatever will be put into our pending_exit list. A pending exit is an entry that created by the
80        returning of a call or syscall. It is "pending" since we cannot immediately figure out whether this entry will
81        be executed or not. If the corresponding call/syscall intentially doesn't return, then the pending exit will be
82        removed. If the corresponding call/syscall returns, then the pending exit will be removed as well (since a real
83        entry is created from the returning and will be analyzed later). If the corresponding call/syscall might
84        return, but for some reason (for example, an unsupported instruction is met during the analysis) our analysis
85        does not return properly, then the pending exit will be picked up and put into remaining_jobs list.
86
87        :param returning_source:    Address of the callee function. It might be None if address of the callee is not
88                                    resolvable.
89        :param state:               The state after returning from the callee function. Of course there is no way to get
90                                    a precise state without emulating the execution of the callee, but at least we can
91                                    properly adjust the stack and registers to imitate the real returned state.
92        :param call_stack:          A callstack.
93        """
94
95        self.caller_func_addr = caller_func_addr
96        self.returning_source = returning_source
97        self.state = state
98        self.src_block_id = src_block_id
99        self.src_exit_stmt_idx = src_exit_stmt_idx
100        self.src_exit_ins_addr = src_exit_ins_addr
101        self.call_stack = call_stack
102
103    def __repr__(self):
104        return "<PendingJob to %s, from function %s>" % (self.state.ip, hex(
105            self.returning_source) if self.returning_source is not None else 'Unknown')
106
107    def __hash__(self):
108        return hash((self.caller_func_addr, self.returning_source, self.src_block_id, self.src_exit_stmt_idx,
109                     self.src_exit_ins_addr
110                     )
111                    )
112
113    def __eq__(self, other):
114        if not isinstance(other, PendingJob):
115            return False
116        return self.caller_func_addr == other.caller_func_addr and \
117               self.returning_source == other.returning_source and \
118               self.src_block_id == other.src_block_id and \
119               self.src_exit_stmt_idx == other.src_exit_stmt_idx and \
120               self.src_exit_ins_addr == other.src_exit_ins_addr
121
122
123class CFGEmulated(ForwardAnalysis, CFGBase):    # pylint: disable=abstract-method
124    """
125    This class represents a control-flow graph.
126    """
127
128    tag = "CFGEmulated"
129
130    def __init__(self,
131                 context_sensitivity_level=1,
132                 start=None,
133                 avoid_runs=None,
134                 enable_function_hints=False,
135                 call_depth=None,
136                 call_tracing_filter=None,
137                 initial_state=None,
138                 starts=None,
139                 keep_state=False,
140                 indirect_jump_target_limit=100000,
141                 resolve_indirect_jumps=True,
142                 enable_advanced_backward_slicing=False,
143                 enable_symbolic_back_traversal=False,
144                 indirect_jump_resolvers=None,
145                 additional_edges=None,
146                 no_construct=False,
147                 normalize=False,
148                 max_iterations=1,
149                 address_whitelist=None,
150                 base_graph=None,
151                 iropt_level=None,
152                 max_steps=None,
153                 state_add_options=None,
154                 state_remove_options=None,
155                 model=None,
156                 ):
157        """
158        All parameters are optional.
159
160        :param context_sensitivity_level:           The level of context-sensitivity of this CFG (see documentation for
161                                                    further details). It ranges from 0 to infinity. Default 1.
162        :param avoid_runs:                          A list of runs to avoid.
163        :param enable_function_hints:               Whether to use function hints (constants that might be used as exit
164                                                    targets) or not.
165        :param call_depth:                          How deep in the call stack to trace.
166        :param call_tracing_filter:                 Filter to apply on a given path and jumpkind to determine if it
167                                                    should be skipped when call_depth is reached.
168        :param initial_state:                       An initial state to use to begin analysis.
169        :param iterable starts:                     A collection of starting points to begin analysis. It can contain
170                                                    the following three different types of entries: an address specified
171                                                    as an integer, a 2-tuple that includes an integer address and a
172                                                    jumpkind, or a SimState instance. Unsupported entries in starts will
173                                                    lead to an AngrCFGError being raised.
174        :param keep_state:                          Whether to keep the SimStates for each CFGNode.
175        :param resolve_indirect_jumps:              Whether to enable the indirect jump resolvers for resolving indirect jumps
176        :param enable_advanced_backward_slicing:    Whether to enable an intensive technique for resolving indirect jumps
177        :param enable_symbolic_back_traversal:      Whether to enable an intensive technique for resolving indirect jumps
178        :param list indirect_jump_resolvers:        A custom list of indirect jump resolvers. If this list is None or empty,
179                                                    default indirect jump resolvers specific to this architecture and binary
180                                                    types will be loaded.
181        :param additional_edges:                    A dict mapping addresses of basic blocks to addresses of
182                                                    successors to manually include and analyze forward from.
183        :param bool no_construct:                   Skip the construction procedure. Only used in unit-testing.
184        :param bool normalize:                      If the CFG as well as all Function graphs should be normalized or
185                                                    not.
186        :param int max_iterations:                  The maximum number of iterations that each basic block should be
187                                                    "executed". 1 by default. Larger numbers of iterations are usually
188                                                    required for complex analyses like loop analysis.
189        :param iterable address_whitelist:          A list of allowed addresses. Any basic blocks outside of this
190                                                    collection of addresses will be ignored.
191        :param networkx.DiGraph base_graph:         A basic control flow graph to follow. Each node inside this graph
192                                                    must have the following properties: `addr` and `size`. CFG recovery
193                                                    will strictly follow nodes and edges shown in the graph, and discard
194                                                    any contorl flow that does not follow an existing edge in the base
195                                                    graph. For example, you can pass in a Function local transition
196                                                    graph as the base graph, and CFGEmulated will traverse nodes and
197                                                    edges and extract useful information.
198        :param int iropt_level:                     The optimization level of VEX IR (0, 1, 2). The default level will
199                                                    be used if `iropt_level` is None.
200        :param int max_steps:                       The maximum number of basic blocks to recover forthe longest path
201                                                    from each start before pausing the recovery procedure.
202        :param state_add_options:                   State options that will be added to the initial state.
203        :param state_remove_options:                State options that will be removed from the initial state.
204        """
205        ForwardAnalysis.__init__(self, order_jobs=True if base_graph is not None else False)
206        CFGBase.__init__(self, 'emulated', context_sensitivity_level, normalize=normalize,
207                         resolve_indirect_jumps=resolve_indirect_jumps,
208                         indirect_jump_resolvers=indirect_jump_resolvers,
209                         indirect_jump_target_limit=indirect_jump_target_limit,
210                         model=model,
211        )
212
213        if start is not None:
214            l.warning("`start` is deprecated. Please consider using `starts` instead in your code.")
215            self._starts = (start,)
216        else:
217            if isinstance(starts, (list, set)):
218                self._starts = tuple(starts)
219            elif isinstance(starts, tuple) or starts is None:
220                self._starts = starts
221            else:
222                raise AngrCFGError('Unsupported type of the `starts` argument.')
223
224        if enable_advanced_backward_slicing or enable_symbolic_back_traversal:
225            l.warning("`advanced backward slicing` and `symbolic back traversal` are deprecated.")
226            l.warning("Please use `resolve_indirect_jumps` to resolve indirect jumps using different resolvers instead.")
227
228        self._iropt_level = iropt_level
229        self._avoid_runs = avoid_runs
230        self._enable_function_hints = enable_function_hints
231        self._call_depth = call_depth
232        self._call_tracing_filter = call_tracing_filter
233        self._initial_state = initial_state
234        self._keep_state = keep_state
235        self._advanced_backward_slicing = enable_advanced_backward_slicing
236        self._enable_symbolic_back_traversal = enable_symbolic_back_traversal
237        self._additional_edges = additional_edges if additional_edges else {}
238        self._max_steps = max_steps
239        self._state_add_options = state_add_options if state_add_options is not None else set()
240        self._state_remove_options = state_remove_options if state_remove_options is not None else set()
241        self._state_add_options.update([o.SYMBOL_FILL_UNCONSTRAINED_MEMORY, o.SYMBOL_FILL_UNCONSTRAINED_REGISTERS])
242
243        # add the track_memory_option if the enable function hint flag is set
244        if self._enable_function_hints and o.TRACK_MEMORY_ACTIONS not in self._state_add_options:
245            self._state_add_options.add(o.TRACK_MEMORY_ACTIONS)
246
247        # more initialization
248
249        self._symbolic_function_initial_state = {}
250        self._function_input_states = None
251        self._unresolvable_runs = set()
252
253        # Stores the index for each CFGNode in this CFG after a quasi-topological sort (currently a DFS)
254        # TODO: remove it since it's no longer used
255        self._quasi_topological_order = {}
256        # A copy of all entry points in this CFG. Integers
257        self._entry_points = []
258
259        self._max_iterations = max_iterations
260        self._address_whitelist = set(address_whitelist) if address_whitelist is not None else None
261        self._base_graph = base_graph
262        self._node_addr_visiting_order = [ ]
263
264        if self._base_graph:
265            sorted_nodes = CFGUtils.quasi_topological_sort_nodes(self._base_graph)
266            self._node_addr_visiting_order = [ n.addr for n in sorted_nodes ]
267
268        self._sanitize_parameters()
269
270        self._executable_address_ranges = []
271        self._executable_address_ranges = self._executable_memory_regions()
272
273        # These save input states of functions. It will be discarded after the CFG is constructed
274        self._function_input_states = {}
275        self._loop_back_edges = []
276        self._overlapped_loop_headers = []
277        self._pending_function_hints = set()
278        # A dict to log edges and the jumpkind between each basic block
279        self._edge_map = defaultdict(list)
280
281        self._model._iropt_level = self._iropt_level
282
283        self._start_keys = [ ]  # a list of block IDs of all starts
284
285        # For each call, we are always getting two exits: an Ijk_Call that
286        # stands for the real call exit, and an Ijk_Ret that is a simulated exit
287        # for the retn address. There are certain cases that the control flow
288        # never returns to the next instruction of a callsite due to
289        # imprecision of the concrete execution. So we save those simulated
290        # exits here to increase our code coverage. Of course the real retn from
291        # that call always precedes those "fake" retns.
292        self._pending_jobs = defaultdict(list) # Dict[BlockID, List[PendingJob]]
293
294        # Counting how many times a basic block is traced into
295        self._traced_addrs = defaultdict(lambda: defaultdict(int))
296
297        # A dict that collects essential parameters to properly reconstruct initial state for a block
298        self._block_artifacts = {}
299        self._analyzed_addrs = set()
300        self._non_returning_functions = set()
301
302        self._pending_edges = defaultdict(list)
303
304        if not no_construct:
305            self._initialize_cfg()
306            self._analyze()
307
308    #
309    # Public methods
310    #
311
312    def copy(self):
313        """
314        Make a copy of the CFG.
315
316        :return: A copy of the CFG instance.
317        :rtype: angr.analyses.CFG
318        """
319        new_cfg = CFGEmulated.__new__(CFGEmulated)
320        super(CFGEmulated, self).make_copy(new_cfg)
321
322        new_cfg._indirect_jump_target_limit = self._indirect_jump_target_limit
323        new_cfg.named_errors = dict(self.named_errors)
324        new_cfg.errors = list(self.errors)
325        new_cfg._fail_fast = self._fail_fast
326        new_cfg._max_steps = self._max_steps
327        new_cfg.project = self.project
328
329        # Intelligently (or stupidly... you tell me) fill it up
330        new_cfg._edge_map = self._edge_map.copy()
331        new_cfg._loop_back_edges = self._loop_back_edges[::]
332        new_cfg._executable_address_ranges = self._executable_address_ranges[::]
333        new_cfg._unresolvable_runs = self._unresolvable_runs.copy()
334        new_cfg._overlapped_loop_headers = self._overlapped_loop_headers[::]
335        new_cfg._thumb_addrs = self._thumb_addrs.copy()
336        new_cfg._keep_state = self._keep_state
337
338        return new_cfg
339
340    def resume(self, starts=None, max_steps=None):
341        """
342        Resume a paused or terminated control flow graph recovery.
343
344        :param iterable starts: A collection of new starts to resume from. If `starts` is None, we will resume CFG
345                                recovery from where it was paused before.
346        :param int max_steps:   The maximum number of blocks on the longest path starting from each start before pausing
347                                the recovery.
348        :return: None
349        """
350
351        self._should_abort = False
352
353        self._starts = starts
354        self._max_steps = max_steps
355
356        if self._starts is None:
357            self._starts = [ ]
358
359        if self._starts:
360            self._sanitize_starts()
361
362        self._analyze()
363
364    def remove_cycles(self):
365        """
366        Forces graph to become acyclic, removes all loop back edges and edges between overlapped loop headers and their
367        successors.
368        """
369        # loop detection
370        # only detect loops after potential graph normalization
371        if not self._loop_back_edges:
372            l.debug("Detecting loops...")
373            self._detect_loops()
374
375        l.debug("Removing cycles...")
376        l.debug("There are %d loop back edges.", len(self._loop_back_edges))
377        l.debug("And there are %d overlapping loop headers.", len(self._overlapped_loop_headers))
378        # First break all detected loops
379        for b1, b2 in self._loop_back_edges:
380            if self._graph.has_edge(b1, b2):
381                l.debug("Removing loop back edge %s -> %s", b1, b2)
382                self._graph.remove_edge(b1, b2)
383        # Then remove all outedges from overlapped loop headers
384        for b in self._overlapped_loop_headers:
385            successors = self._graph.successors(b)
386            for succ in successors:
387                self._graph.remove_edge(b, succ)
388                l.debug("Removing partial loop header edge %s -> %s", b, succ)
389
390    def downsize(self):
391        """
392        Remove saved states from all CFGNodes to reduce memory usage.
393
394        :return: None
395        """
396
397        for cfg_node in self._nodes.values():
398            cfg_node.downsize()
399
400    def unroll_loops(self, max_loop_unrolling_times):
401        """
402        Unroll loops for each function. The resulting CFG may still contain loops due to recursion, function calls, etc.
403
404        :param int max_loop_unrolling_times: The maximum iterations of unrolling.
405        :return: None
406        """
407
408        if not isinstance(max_loop_unrolling_times, int) or \
409                        max_loop_unrolling_times < 0:
410            raise AngrCFGError('Max loop unrolling times must be set to an integer greater than or equal to 0 if ' +
411                               'loop unrolling is enabled.')
412
413        def _unroll(graph, loop):
414            """
415            The loop callback method where loops are unrolled.
416
417            :param networkx.DiGraph graph: The control flow graph.
418            :param angr.analyses.loopfinder.Loop loop: The loop instance.
419            :return: None
420            """
421
422            for back_edge in loop.continue_edges:
423                loop_body_addrs = {n.addr for n in loop.body_nodes}
424                src_blocknode = back_edge[0]  # type: angr.knowledge.codenode.BlockNode
425                dst_blocknode = back_edge[1]  # type: angr.knowledge.codenode.BlockNode
426
427                for src in self.get_all_nodes(src_blocknode.addr):
428                    for dst in graph.successors(src):
429                        if dst.addr != dst_blocknode.addr:
430                            continue
431
432                        # Duplicate the dst node
433                        new_dst = dst.copy()
434                        new_dst.looping_times = dst.looping_times + 1
435                        if (new_dst not in graph and
436                                # If the new_dst is already in the graph, we don't want to keep unrolling
437                                # the this loop anymore since it may *create* a new loop. Of course we
438                                # will lose some edges in this way, but in general it is acceptable.
439                                new_dst.looping_times <= max_loop_unrolling_times
440                            ):
441                            # Log all successors of the dst node
442                            dst_successors = graph.successors(dst)
443                            # Add new_dst to the graph
444                            edge_data = graph.get_edge_data(src, dst)
445                            graph.add_edge(src, new_dst, **edge_data)
446                            for ds in dst_successors:
447                                if ds.looping_times == 0 and ds.addr not in loop_body_addrs:
448                                    edge_data = graph.get_edge_data(dst, ds)
449                                    graph.add_edge(new_dst, ds, **edge_data)
450
451                        graph.remove_edge(src, dst)
452
453        self._detect_loops(loop_callback=_unroll)
454
455    def force_unroll_loops(self, max_loop_unrolling_times):
456        """
457        Unroll loops globally. The resulting CFG does not contain any loop, but this method is slow on large graphs.
458
459        :param int max_loop_unrolling_times: The maximum iterations of unrolling.
460        :return: None
461        """
462
463        if not isinstance(max_loop_unrolling_times, int) or \
464                        max_loop_unrolling_times < 0:
465            raise AngrCFGError('Max loop unrolling times must be set to an integer greater than or equal to 0 if ' +
466                               'loop unrolling is enabled.')
467
468        # Traverse the CFG and try to find the beginning of loops
469        loop_backedges = []
470
471        start = self._starts[0]
472        if isinstance(start, tuple):
473            start, _ = start  # pylint: disable=unpacking-non-sequence
474        start_node = self.get_any_node(start)
475        if start_node is None:
476            raise AngrCFGError('Cannot find start node when trying to unroll loops. The CFG might be empty.')
477
478        graph_copy = networkx.DiGraph(self.graph)
479
480        while True:
481            cycles_iter = networkx.simple_cycles(graph_copy)
482            try:
483                cycle = next(cycles_iter)
484            except StopIteration:
485                break
486
487            loop_backedge = (None, None)
488
489            for n in networkx.dfs_preorder_nodes(graph_copy, source=start_node):
490                if n in cycle:
491                    idx = cycle.index(n)
492                    if idx == 0:
493                        loop_backedge = (cycle[-1], cycle[idx])
494                    else:
495                        loop_backedge = (cycle[idx - 1], cycle[idx])
496                    break
497
498            if loop_backedge not in loop_backedges:
499                loop_backedges.append(loop_backedge)
500
501            # Create a common end node for all nodes whose out_degree is 0
502            end_nodes = [n for n in graph_copy.nodes() if graph_copy.out_degree(n) == 0]
503            new_end_node = "end_node"
504
505            if not end_nodes:
506                # We gotta randomly break a loop
507                cycles = sorted(networkx.simple_cycles(graph_copy), key=len)
508                first_cycle = cycles[0]
509                if len(first_cycle) == 1:
510                    graph_copy.remove_edge(first_cycle[0], first_cycle[0])
511                else:
512                    graph_copy.remove_edge(first_cycle[0], first_cycle[1])
513                end_nodes = [n for n in graph_copy.nodes() if graph_copy.out_degree(n) == 0]
514
515            for en in end_nodes:
516                graph_copy.add_edge(en, new_end_node)
517
518            # postdoms = self.immediate_postdominators(new_end_node, target_graph=graph_copy)
519            # reverse_postdoms = defaultdict(list)
520            # for k, v in postdoms.items():
521            #    reverse_postdoms[v].append(k)
522
523            # Find all loop bodies
524            # for src, dst in loop_backedges:
525            #    nodes_in_loop = { src, dst }
526
527            #    while True:
528            #        new_nodes = set()
529
530            #        for n in nodes_in_loop:
531            #            if n in reverse_postdoms:
532            #                for node in reverse_postdoms[n]:
533            #                    if node not in nodes_in_loop:
534            #                        new_nodes.add(node)
535
536            #        if not new_nodes:
537            #            break
538
539            #        nodes_in_loop |= new_nodes
540
541            # Unroll the loop body
542            # TODO: Finish the implementation
543
544            graph_copy.remove_node(new_end_node)
545            src, dst = loop_backedge
546            if graph_copy.has_edge(src, dst):  # It might have been removed before
547                # Duplicate the dst node
548                new_dst = dst.copy()
549                new_dst.looping_times = dst.looping_times + 1
550                if (
551                        new_dst not in graph_copy and
552                        # If the new_dst is already in the graph, we don't want to keep unrolling
553                        # the this loop anymore since it may *create* a new loop. Of course we
554                        # will lose some edges in this way, but in general it is acceptable.
555                        new_dst.looping_times <= max_loop_unrolling_times):
556                    # Log all successors of the dst node
557                    dst_successors = list(graph_copy.successors(dst))
558                    # Add new_dst to the graph
559                    edge_data = graph_copy.get_edge_data(src, dst)
560                    graph_copy.add_edge(src, new_dst, **edge_data)
561                    for ds in dst_successors:
562                        if ds.looping_times == 0 and ds not in cycle:
563                            edge_data = graph_copy.get_edge_data(dst, ds)
564                            graph_copy.add_edge(new_dst, ds, **edge_data)
565                # Remove the original edge
566                graph_copy.remove_edge(src, dst)
567
568        # Update loop backedges
569        self._loop_back_edges = loop_backedges
570
571        self.model.graph = graph_copy
572
573    def immediate_dominators(self, start, target_graph=None):
574        """
575        Get all immediate dominators of sub graph from given node upwards.
576
577        :param str start: id of the node to navigate forwards from.
578        :param networkx.classes.digraph.DiGraph target_graph: graph to analyse, default is self.graph.
579
580        :return: each node of graph as index values, with element as respective node's immediate dominator.
581        :rtype: dict
582        """
583        return self._immediate_dominators(start, target_graph=target_graph, reverse_graph=False)
584
585    def immediate_postdominators(self, end, target_graph=None):
586        """
587        Get all immediate postdominators of sub graph from given node upwards.
588
589        :param str start: id of the node to navigate forwards from.
590        :param networkx.classes.digraph.DiGraph target_graph: graph to analyse, default is self.graph.
591
592        :return: each node of graph as index values, with element as respective node's immediate dominator.
593        :rtype: dict
594        """
595        return self._immediate_dominators(end, target_graph=target_graph, reverse_graph=True)
596
597    def remove_fakerets(self):
598        """
599        Get rid of fake returns (i.e., Ijk_FakeRet edges) from this CFG
600
601        :return: None
602        """
603        fakeret_edges = [ (src, dst) for src, dst, data in self.graph.edges(data=True)
604                         if data['jumpkind'] == 'Ijk_FakeRet' ]
605        self.graph.remove_edges_from(fakeret_edges)
606
607    def get_topological_order(self, cfg_node):
608        """
609        Get the topological order of a CFG Node.
610
611        :param cfg_node: A CFGNode instance.
612        :return: An integer representing its order, or None if the CFGNode does not exist in the graph.
613        """
614
615        if not self._quasi_topological_order:
616            self._quasi_topological_sort()
617
618        return self._quasi_topological_order.get(cfg_node, None)
619
620    def get_subgraph(self, starting_node, block_addresses):
621        """
622        Get a sub-graph out of a bunch of basic block addresses.
623
624        :param CFGNode starting_node: The beginning of the subgraph
625        :param iterable block_addresses: A collection of block addresses that should be included in the subgraph if
626                                        there is a path between `starting_node` and a CFGNode with the specified
627                                        address, and all nodes on the path should also be included in the subgraph.
628        :return: A new CFG that only contain the specific subgraph.
629        :rtype: CFGEmulated
630        """
631
632        graph = networkx.DiGraph()
633
634        if starting_node not in self.graph:
635            raise AngrCFGError('get_subgraph(): the specified "starting_node" %s does not exist in the current CFG.'
636                               % starting_node
637                               )
638
639        addr_set = set(block_addresses)
640
641        graph.add_node(starting_node)
642        queue = [ starting_node ]
643
644        while queue:
645            node = queue.pop()
646            for _, dst, data in self.graph.out_edges([node], data=True):
647                if dst not in graph and dst.addr in addr_set:
648                    graph.add_edge(node, dst, **data)
649                    queue.append(dst)
650
651        cfg = self.copy()
652        cfg._graph = graph
653        cfg._starts = (starting_node.addr, )
654
655        return cfg
656
657    def get_function_subgraph(self, start, max_call_depth=None):
658        """
659        Get a sub-graph of a certain function.
660
661        :param start: The function start. Currently it should be an integer.
662        :param max_call_depth: Call depth limit. None indicates no limit.
663        :return: A CFG instance which is a sub-graph of self.graph
664        """
665
666        # FIXME: syscalls are not supported
667        # FIXME: start should also take a CFGNode instance
668
669        start_node = self.get_any_node(start)
670
671        node_wrapper = (start_node, 0)
672        stack = [node_wrapper]
673        traversed_nodes = {start_node}
674        subgraph_nodes = set([start_node])
675
676        while stack:
677            nw = stack.pop()
678            n, call_depth = nw[0], nw[1]
679
680            # Get successors
681            edges = self.graph.out_edges(n, data=True)
682
683            for _, dst, data in edges:
684                if dst not in traversed_nodes:
685                    # We see a new node!
686                    traversed_nodes.add(dst)
687
688                    if data['jumpkind'] == 'Ijk_Call':
689                        if max_call_depth is None or (max_call_depth is not None and call_depth < max_call_depth):
690                            subgraph_nodes.add(dst)
691                            new_nw = (dst, call_depth + 1)
692                            stack.append(new_nw)
693                    elif data['jumpkind'] == 'Ijk_Ret':
694                        if call_depth > 0:
695                            subgraph_nodes.add(dst)
696                            new_nw = (dst, call_depth - 1)
697                            stack.append(new_nw)
698                    else:
699                        subgraph_nodes.add(dst)
700                        new_nw = (dst, call_depth)
701                        stack.append(new_nw)
702
703       #subgraph = networkx.subgraph(self.graph, subgraph_nodes)
704        subgraph = self.graph.subgraph(subgraph_nodes).copy()
705
706        # Make it a CFG instance
707        subcfg = self.copy()
708        subcfg._graph = subgraph
709        subcfg._starts = (start,)
710
711        return subcfg
712
713    @property
714    def context_sensitivity_level(self):
715        return self._context_sensitivity_level
716
717    #
718    # Serialization
719    #
720
721    def __setstate__(self, s):
722        self.project = s['project']
723        self.indirect_jumps: Dict[int,IndirectJump] = s['indirect_jumps']
724        self._loop_back_edges = s['_loop_back_edges']
725        self._thumb_addrs = s['_thumb_addrs']
726        self._unresolvable_runs = s['_unresolvable_runs']
727        self._executable_address_ranges = s['_executable_address_ranges']
728        self._iropt_level = s['_iropt_level']
729        self._model = s['_model']
730
731    def __getstate__(self):
732        s = {
733            'project': self.project,
734            "indirect_jumps": self.indirect_jumps,
735            '_loop_back_edges': self._loop_back_edges,
736            '_nodes_by_addr': self._nodes_by_addr,
737            '_thumb_addrs': self._thumb_addrs,
738            '_unresolvable_runs': self._unresolvable_runs,
739            '_executable_address_ranges': self._executable_address_ranges,
740            '_iropt_level': self._iropt_level,
741            '_model': self._model
742        }
743
744        return s
745
746    #
747    # Properties
748    #
749
750    @property
751    def graph(self):
752        return self._model.graph
753
754    @property
755    def unresolvables(self):
756        """
757        Get those SimRuns that have non-resolvable exits.
758
759        :return:    A set of SimRuns
760        :rtype:     set
761        """
762        return self._unresolvable_runs
763
764    @property
765    def deadends(self):
766        """
767        Get all CFGNodes that has an out-degree of 0
768
769        :return: A list of CFGNode instances
770        :rtype:  list
771        """
772        if self.graph is None:
773            raise AngrCFGError('CFG hasn\'t been generated yet.')
774
775        deadends = [i for i in self.graph if self.graph.out_degree(i) == 0]
776
777        return deadends
778
779    #
780    # Private methods
781    #
782
783    # Initialization related methods
784
785    def _sanitize_parameters(self):
786        """
787        Perform a sanity check on parameters passed in to CFG.__init__().
788        An AngrCFGError is raised if any parameter fails the sanity check.
789
790        :return: None
791        """
792
793        # Check additional_edges
794        if isinstance(self._additional_edges, (list, set, tuple)):
795            new_dict = defaultdict(list)
796            for s, d in self._additional_edges:
797                new_dict[s].append(d)
798            self._additional_edges = new_dict
799
800        elif isinstance(self._additional_edges, dict):
801            pass
802
803        else:
804            raise AngrCFGError('Additional edges can only be a list, set, tuple, or a dict.')
805
806        # Check _advanced_backward_slicing
807        if self._advanced_backward_slicing and self._enable_symbolic_back_traversal:
808            raise AngrCFGError('Advanced backward slicing and symbolic back traversal cannot both be enabled.')
809
810        if self._advanced_backward_slicing and not self._keep_state:
811            raise AngrCFGError('Keep state must be enabled if advanced backward slicing is enabled.')
812
813        # Sanitize avoid_runs
814        self._avoid_runs = [ ] if self._avoid_runs is None else self._avoid_runs
815        if not isinstance(self._avoid_runs, (list, set)):
816            raise AngrCFGError('"avoid_runs" must either be None, or a list or a set.')
817
818        self._sanitize_starts()
819
820    def _sanitize_starts(self):
821        # Sanitize starts
822        # Convert self._starts to a list of SimState instances or tuples of (ip, jumpkind)
823        if self._starts is None:
824            self._starts = ((self.project.entry, None),)
825
826        else:
827            new_starts = [ ]
828            for item in self._starts:
829                if isinstance(item, tuple):
830                    if len(item) != 2:
831                        raise AngrCFGError('Unsupported item in "starts": %s' % str(item))
832
833                    new_starts.append(item)
834                elif isinstance(item, int):
835                    new_starts.append((item, None))
836
837                elif isinstance(item, SimState):
838                    new_starts.append(item)
839
840                else:
841                    raise AngrCFGError('Unsupported item type in "starts": %s' % type(item))
842
843            self._starts = new_starts
844
845        if not self._starts:
846            raise AngrCFGError("At least one start must be provided")
847
848    # CFG construction
849    # The main loop and sub-methods
850
851    def _job_key(self, job):
852        """
853        Get the key for a specific CFG job. The key is a context-sensitive block ID.
854
855        :param CFGJob job: The CFGJob instance.
856        :return:             The block ID of the specific CFG job.
857        :rtype:              BlockID
858        """
859
860        return job.block_id
861
862    def _job_sorting_key(self, job):
863        """
864        Get the sorting key of a CFGJob instance.
865
866        :param CFGJob job: the CFGJob object.
867        :return: An integer that determines the order of this job in the queue.
868        :rtype: int
869        """
870
871        if self._base_graph is None:
872            # we don't do sorting if there is no base_graph
873            return 0
874
875        MAX_JOBS = 1000000
876
877        if job.addr not in self._node_addr_visiting_order:
878            return MAX_JOBS
879
880        return self._node_addr_visiting_order.index(job.addr)
881
882    def _pre_analysis(self):
883        """
884        Initialization work. Executed prior to the analysis.
885
886        :return: None
887        """
888
889        # Fill up self._starts
890        for item in self._starts:
891            callstack = None
892            if isinstance(item, tuple):
893                # (addr, jumpkind)
894                ip = item[0]
895                state = self._create_initial_state(item[0], item[1])
896
897            elif isinstance(item, SimState):
898                # SimState
899                state = item.copy()  # pylint: disable=no-member
900                ip = state.solver.eval_one(state.ip)
901                self._reset_state_mode(state, 'fastpath')
902
903            else:
904                raise AngrCFGError('Unsupported CFG start type: %s.' % str(type(item)))
905
906            self._symbolic_function_initial_state[ip] = state
907            path_wrapper = CFGJob(ip, state, self._context_sensitivity_level, None, None, call_stack=callstack)
908            key = path_wrapper.block_id
909            if key not in self._start_keys:
910                self._start_keys.append(key)
911
912            self._insert_job(path_wrapper)
913            self._register_analysis_job(path_wrapper.func_addr, path_wrapper)
914
915    def _intra_analysis(self):
916        """
917        During the analysis. We process function hints here.
918
919        :return: None
920        """
921
922        # TODO: Why was the former two conditions there in the first place?
923        if self._pending_function_hints:
924            self._process_hints(self._analyzed_addrs)
925
926    def _job_queue_empty(self):
927        """
928        A callback method called when the job queue is empty.
929
930        :return: None
931        """
932
933        self._iteratively_clean_pending_exits()
934
935        while self._pending_jobs:
936            # We don't have any exits remaining. Let's pop out a pending exit
937            pending_job = self._get_one_pending_job()
938            if pending_job is None:
939                continue
940
941            self._insert_job(pending_job)
942            self._register_analysis_job(pending_job.func_addr, pending_job)
943            break
944
945    def _create_initial_state(self, ip, jumpkind):
946        """
947        Obtain a SimState object for a specific address
948
949        Fastpath means the CFG generation will work in an IDA-like way, in which it will not try to execute every
950        single statement in the emulator, but will just do the decoding job. This is much faster than the old way.
951
952        :param int ip: The instruction pointer
953        :param str jumpkind: The jumpkind upon executing the block
954        :return: The newly-generated state
955        :rtype: SimState
956        """
957
958        jumpkind = "Ijk_Boring" if jumpkind is None else jumpkind
959
960        if self._initial_state is None:
961            state = self.project.factory.blank_state(addr=ip, mode="fastpath",
962                                                     add_options=self._state_add_options,
963                                                     remove_options=self._state_remove_options,
964                                                     )
965            self._initial_state = state
966        else:
967            # FIXME: self._initial_state is deprecated. This branch will be removed soon
968            state = self._initial_state
969            state.history.jumpkind = jumpkind
970            self._reset_state_mode(state, 'fastpath')
971            state._ip = state.solver.BVV(ip, self.project.arch.bits)
972
973        if jumpkind is not None:
974            state.history.jumpkind = jumpkind
975
976        # THIS IS A HACK FOR MIPS
977        if ip is not None and self.project.arch.name in ('MIPS32', 'MIPS64'):
978            # We assume this is a function start
979            state.regs.t9 = ip
980        # TODO there was at one point special logic for the ppc64 table of contents but it seems to have bitrotted
981
982        return state
983
984    def _get_one_pending_job(self):
985        """
986        Retrieve a pending job.
987
988        :return: A CFGJob instance or None
989        """
990
991        pending_job_key = next(iter(self._pending_jobs.keys()))
992        pending_job = self._pending_jobs[pending_job_key].pop()
993        if len(self._pending_jobs[pending_job_key]) == 0:
994            del self._pending_jobs[pending_job_key]
995
996        pending_job_state = pending_job.state
997        pending_job_call_stack = pending_job.call_stack
998        pending_job_src_block_id = pending_job.src_block_id
999        pending_job_src_exit_stmt_idx = pending_job.src_exit_stmt_idx
1000
1001        self._deregister_analysis_job(pending_job.caller_func_addr, pending_job)
1002
1003        # Let's check whether this address has been traced before.
1004        if pending_job_key in self._nodes:
1005            node = self._nodes[pending_job_key]
1006            if node in self.graph:
1007                pending_exit_addr = self._block_id_addr(pending_job_key)
1008                # That block has been traced before. Let's forget about it
1009                l.debug("Target 0x%08x has been traced before. Trying the next one...", pending_exit_addr)
1010
1011                # However, we should still create the FakeRet edge
1012                self._graph_add_edge(pending_job_src_block_id, pending_job_key, jumpkind="Ijk_FakeRet",
1013                                     stmt_idx=pending_job_src_exit_stmt_idx, ins_addr=pending_job.src_exit_ins_addr)
1014
1015                return None
1016
1017        pending_job_state.history.jumpkind = 'Ijk_FakeRet'
1018
1019        job = CFGJob(pending_job_state.addr,
1020                     pending_job_state,
1021                     self._context_sensitivity_level,
1022                     src_block_id=pending_job_src_block_id,
1023                     src_exit_stmt_idx=pending_job_src_exit_stmt_idx,
1024                     src_ins_addr=pending_job.src_exit_ins_addr,
1025                     call_stack=pending_job_call_stack,
1026        )
1027        l.debug("Tracing a missing return exit %s", self._block_id_repr(pending_job_key))
1028
1029        return job
1030
1031    def _process_hints(self, analyzed_addrs):
1032        """
1033        Process function hints in the binary.
1034
1035        :return: None
1036        """
1037
1038        # Function hints!
1039        # Now let's see how many new functions we can get here...
1040        while self._pending_function_hints:
1041            f = self._pending_function_hints.pop()
1042            if f not in analyzed_addrs:
1043                new_state = self.project.factory.entry_state(mode='fastpath')
1044                new_state.ip = new_state.solver.BVV(f, self.project.arch.bits)
1045
1046                # TOOD: Specially for MIPS
1047                if new_state.arch.name in ('MIPS32', 'MIPS64'):
1048                    # Properly set t9
1049                    new_state.registers.store('t9', f)
1050
1051                new_path_wrapper = CFGJob(f,
1052                                          new_state,
1053                                          self._context_sensitivity_level
1054                                          )
1055                self._insert_job(new_path_wrapper)
1056                self._register_analysis_job(f, new_path_wrapper)
1057                l.debug('Picking a function 0x%x from pending function hints.', f)
1058                self.kb.functions.function(new_path_wrapper.func_addr, create=True)
1059                break
1060
1061    def _post_analysis(self):
1062        """
1063        Post-CFG-construction.
1064
1065        :return: None
1066        """
1067
1068        self._make_completed_functions()
1069        new_changes = self._iteratively_analyze_function_features()
1070        functions_do_not_return = new_changes['functions_do_not_return']
1071        self._update_function_callsites(functions_do_not_return)
1072
1073        # Create all pending edges
1074        for _, edges in self._pending_edges.items():
1075            for src_node, dst_node, data in edges:
1076                self._graph_add_edge(src_node, dst_node, **data)
1077
1078        # Remove those edges that will never be taken!
1079        self._remove_non_return_edges()
1080
1081        CFGBase._post_analysis(self)
1082
1083    # Job handling
1084
1085    def _pre_job_handling(self, job):  # pylint:disable=arguments-differ
1086        """
1087        Before processing a CFGJob.
1088        Right now each block is traced at most once. If it is traced more than once, we will mark it as "should_skip"
1089        before tracing it.
1090        An AngrForwardAnalysisSkipJob exception is raised in order to skip analyzing the job.
1091
1092        :param CFGJob job: The CFG job object.
1093        :param dict _locals: A bunch of local variables that will be kept around when handling this job and its
1094                        corresponding successors.
1095        :return: None
1096        """
1097
1098        # Extract initial info the CFGJob
1099        job.call_stack_suffix = job.get_call_stack_suffix()
1100        job.current_function = self.kb.functions.function(job.func_addr, create=True,
1101                                                          syscall=job.jumpkind.startswith("Ijk_Sys"))
1102        src_block_id = job.src_block_id
1103        src_exit_stmt_idx = job.src_exit_stmt_idx
1104        src_ins_addr = job.src_ins_addr
1105        addr = job.addr
1106
1107        # Log this address
1108        if l.level == logging.DEBUG:
1109            self._analyzed_addrs.add(addr)
1110
1111        if addr == job.func_addr:
1112            # Store the input state of this function
1113            self._function_input_states[job.func_addr] = job.state
1114
1115        # deregister this job
1116        self._deregister_analysis_job(job.func_addr, job)
1117
1118        # Generate a unique key for this job
1119        block_id = job.block_id
1120
1121        # Should we skip tracing this block?
1122        should_skip = False
1123        if self._traced_addrs[job.call_stack_suffix][addr] >= self._max_iterations:
1124            should_skip = True
1125        elif self._is_call_jumpkind(job.jumpkind) and \
1126             self._call_depth is not None and \
1127             len(job.call_stack) > self._call_depth and \
1128             (self._call_tracing_filter is None or self._call_tracing_filter(job.state, job.jumpkind)):
1129            should_skip = True
1130
1131        # SimInspect breakpoints support
1132        job.state._inspect('cfg_handle_job', BP_BEFORE)
1133
1134        # Get a SimSuccessors out of current job
1135        sim_successors, exception_info, _ = self._get_simsuccessors(addr, job, current_function_addr=job.func_addr)
1136
1137        # determine the depth of this basic block
1138        if self._max_steps is None:
1139            # it's unnecessary to track depth when we are not limiting max_steps
1140            depth = None
1141        else:
1142            if src_block_id is None:
1143                # oh this is the very first basic block on this path
1144                depth = 0
1145            else:
1146                src_cfgnode = self._nodes[src_block_id]
1147                depth = src_cfgnode.depth + 1
1148                # the depth will not be updated later on even if this block has a greater depth on another path.
1149                # consequently, the `max_steps` limit is not veyr precise - I didn't see a need to make it precise
1150                # though.
1151
1152        if block_id not in self._nodes:
1153            # Create the CFGNode object
1154            cfg_node = self._create_cfgnode(sim_successors, job.call_stack, job.func_addr, block_id=block_id,
1155                                            depth=depth
1156                                            )
1157
1158            self._nodes[block_id] = cfg_node
1159            self._nodes_by_addr[cfg_node.addr].append(cfg_node)
1160
1161        else:
1162            # each block_id should only correspond to one CFGNode
1163            # use the existing CFGNode object
1164            # note that since we reuse existing CFGNodes, we may miss the following cases:
1165            #
1166            # mov eax, label_0       mov ebx, label_1
1167            #     jmp xxx                 jmp xxx
1168            #         |                      |
1169            #         |                      |
1170            #         *----------------------*
1171            #                     |
1172            #                 call eax
1173            #
1174            # Since the basic block "call eax" will only be traced once, either label_0 or label_1 will be missed in
1175            # this case. Indirect jump resolution might be able to get it, but that's another story. Ideally,
1176            # "call eax" should be traced twice with *different* sim_successors keys, which requires block ID being flow
1177            # sensitive, but it is way too expensive.
1178
1179            cfg_node = self._nodes[block_id]
1180
1181        # Increment tracing count for this block
1182        self._traced_addrs[job.call_stack_suffix][addr] += 1
1183
1184        if self._keep_state:
1185            # TODO: if we are reusing an existing CFGNode, we will be overwriting the original input state here. we
1186            # TODO: should save them all, which, unfortunately, requires some redesigning :-(
1187            cfg_node.input_state = sim_successors.initial_state
1188
1189        # See if this job cancels another FakeRet
1190        # This should be done regardless of whether this job should be skipped or not, otherwise edges will go missing
1191        # in the CFG or function transition graphs.
1192        if job.jumpkind == 'Ijk_FakeRet' or \
1193                (job.jumpkind == 'Ijk_Ret' and block_id in self._pending_jobs):
1194            # The fake ret is confirmed (since we are returning from the function it calls). Create an edge for it
1195            # in the graph.
1196
1197            the_jobs = [ ]
1198            if block_id in self._pending_jobs:
1199                the_jobs = self._pending_jobs.pop(block_id)
1200                for the_job in the_jobs:  # type: Pendingjob
1201                    self._deregister_analysis_job(the_job.caller_func_addr, the_job)
1202            else:
1203                the_jobs = [job]
1204
1205            for the_job in the_jobs:
1206                self._graph_add_edge(the_job.src_block_id, block_id,
1207                                     jumpkind='Ijk_FakeRet',
1208                                     stmt_idx=the_job.src_exit_stmt_idx,
1209                                     ins_addr=src_ins_addr
1210                                     )
1211                self._update_function_transition_graph(the_job.src_block_id, block_id,
1212                                                       jumpkind='Ijk_FakeRet',
1213                                                       ins_addr=src_ins_addr,
1214                                                       stmt_idx=the_job.src_exit_stmt_idx,
1215                                                       confirmed=True
1216                                                       )
1217
1218        if sim_successors is None or should_skip:
1219            # We cannot retrieve the block, or we should skip the analysis of this node
1220            # But we create the edge anyway. If the sim_successors does not exist, it will be an edge from the previous
1221            # node to a PathTerminator
1222            self._graph_add_edge(src_block_id, block_id,
1223                                 jumpkind=job.jumpkind,
1224                                 stmt_idx=src_exit_stmt_idx,
1225                                 ins_addr=src_ins_addr
1226                                 )
1227            self._update_function_transition_graph(src_block_id, block_id,
1228                                                   jumpkind=job.jumpkind,
1229                                                   ins_addr=src_ins_addr,
1230                                                   stmt_idx=src_exit_stmt_idx)
1231
1232            # We are good. Raise the exception and leave
1233            raise AngrSkipJobNotice()
1234
1235        self._update_thumb_addrs(sim_successors, job.state)
1236
1237        # We store the function hints first. Function hints will be checked at the end of the analysis to avoid
1238        # any duplication with existing jumping targets
1239        if self._enable_function_hints:
1240            if sim_successors.sort == 'IRSB' and sim_successors.all_successors:
1241                function_hints = self._search_for_function_hints(sim_successors.all_successors[0])
1242                for f in function_hints:
1243                    self._pending_function_hints.add(f)
1244
1245        self._graph_add_edge(src_block_id, block_id, jumpkind=job.jumpkind, stmt_idx=src_exit_stmt_idx,
1246                             ins_addr=src_ins_addr)
1247        self._update_function_transition_graph(src_block_id, block_id, jumpkind=job.jumpkind,
1248                                               ins_addr=src_ins_addr, stmt_idx=src_exit_stmt_idx)
1249
1250        if block_id in self._pending_edges:
1251            # there are some edges waiting to be created. do it here.
1252            for src_key, dst_key, data in self._pending_edges[block_id]:
1253                self._graph_add_edge(src_key, dst_key, **data)
1254            del self._pending_edges[block_id]
1255
1256        block_info = {reg: sim_successors.initial_state.registers.load(reg) for reg in self.project.arch.persistent_regs}
1257        self._block_artifacts[addr] = block_info
1258
1259        job.cfg_node = cfg_node
1260        job.sim_successors = sim_successors
1261        job.exception_info = exception_info
1262
1263        # For debugging purposes!
1264        job.successor_status = {}
1265
1266    def _get_successors(self, job):
1267        """
1268        Get a collection of successors out of the current job.
1269
1270        :param CFGJob job:  The CFGJob instance.
1271        :return:            A collection of successors.
1272        :rtype:             list
1273        """
1274
1275        addr = job.addr
1276        sim_successors = job.sim_successors
1277        cfg_node = job.cfg_node
1278        input_state = job.state
1279        func_addr = job.func_addr
1280
1281        # check step limit
1282        if self._max_steps is not None:
1283            depth = cfg_node.depth
1284            if depth >= self._max_steps:
1285                return [ ]
1286
1287        successors = [ ]
1288        is_indirect_jump = sim_successors.sort == 'IRSB' and self._is_indirect_jump(cfg_node, sim_successors)
1289        indirect_jump_resolved_by_resolvers = False
1290
1291        if is_indirect_jump and self._resolve_indirect_jumps:
1292            # Try to resolve indirect jumps
1293            irsb = input_state.block(cross_insn_opt=False).vex
1294
1295            resolved, resolved_targets, ij = self._indirect_jump_encountered(addr, cfg_node, irsb, func_addr,
1296                                                                             stmt_idx=DEFAULT_STATEMENT)
1297            if resolved:
1298                successors = self._convert_indirect_jump_targets_to_states(job, resolved_targets)
1299                if ij:
1300                    self._indirect_jump_resolved(ij, ij.addr, None, resolved_targets)
1301            else:
1302                # Try to resolve this indirect jump using heavier approaches
1303                resolved_targets = self._process_one_indirect_jump(ij)
1304                successors = self._convert_indirect_jump_targets_to_states(job, resolved_targets)
1305
1306            if successors:
1307                indirect_jump_resolved_by_resolvers = True
1308            else:
1309                # It's unresolved. Add it to the wait list (but apparently we don't have any better way to resolve it
1310                # right now).
1311                self._indirect_jumps_to_resolve.add(ij)
1312
1313        if not successors:
1314            # Get all successors of this block
1315            successors = (sim_successors.flat_successors + sim_successors.unsat_successors) \
1316                if addr not in self._avoid_runs else []
1317
1318        # Post-process successors
1319        successors, job.extra_info = self._post_process_successors(input_state, sim_successors, successors)
1320
1321        all_successors = successors + sim_successors.unconstrained_successors
1322
1323        # make sure FakeRets are at the last
1324        all_successors = [ suc for suc in all_successors if suc.history.jumpkind != 'Ijk_FakeRet' ] + \
1325                         [ suc for suc in all_successors if suc.history.jumpkind == 'Ijk_FakeRet' ]
1326
1327        if self._keep_state:
1328            cfg_node.final_states = all_successors[::]
1329
1330        if is_indirect_jump and not indirect_jump_resolved_by_resolvers:
1331            # For indirect jumps, filter successors that do not make sense
1332            successors = self._filter_insane_successors(successors)
1333
1334        successors = self._try_resolving_indirect_jumps(sim_successors,
1335                                                        cfg_node,
1336                                                        func_addr,
1337                                                        successors,
1338                                                        job.exception_info,
1339                                                        self._block_artifacts)
1340        # Remove all successors whose IP is symbolic
1341        successors = [ s for s in successors if not s.ip.symbolic ]
1342
1343        # Add additional edges supplied by the user
1344        successors = self._add_additional_edges(input_state, sim_successors, cfg_node, successors)
1345
1346        # if base graph is used, add successors implied from the graph
1347        if self._base_graph:
1348            basegraph_successor_addrs = set()
1349            for src_, dst_ in self._base_graph.edges():
1350                if src_.addr == addr:
1351                    basegraph_successor_addrs.add(dst_.addr)
1352            successor_addrs = {s.solver.eval(s.ip) for s in successors}
1353            extra_successor_addrs = basegraph_successor_addrs - successor_addrs
1354
1355            if all_successors:  # make sure we have a base state to use
1356                base_state = all_successors[0]  # TODO: for calls, we want to use the fake_ret state
1357
1358                for s_addr in extra_successor_addrs:
1359                    # an extra target
1360                    successor_state = base_state.copy()
1361                    successor_state.ip = s_addr
1362                    successors.append(successor_state)
1363            else:
1364                if extra_successor_addrs:
1365                    l.error('CFGEmulated terminates at %#x although base graph provided more exits.', addr)
1366
1367        if not successors:
1368            # There is no way out :-(
1369            # Log it first
1370            self._push_unresolvable_run(addr)
1371
1372            if sim_successors.sort == 'SimProcedure' and isinstance(sim_successors.artifacts['procedure'],
1373                    SIM_PROCEDURES["stubs"]["PathTerminator"]):
1374                # If there is no valid exit in this branch and it's not
1375                # intentional (e.g. caused by a SimProcedure that does not
1376                # do_return) , we should make it return to its call-site. However,
1377                # we don't want to use its state anymore as it might be corrupted.
1378                # Just create an edge in the graph.
1379                return_target = job.call_stack.current_return_target
1380                if return_target is not None:
1381                    new_call_stack = job.call_stack_copy()
1382                    return_target_key = self._generate_block_id(
1383                        new_call_stack.stack_suffix(self.context_sensitivity_level),
1384                        return_target,
1385                        False
1386                    )  # You can never return to a syscall
1387
1388                    if not cfg_node.instruction_addrs:
1389                        ret_ins_addr = None
1390                    else:
1391                        if self.project.arch.branch_delay_slot:
1392                            if len(cfg_node.instruction_addrs) > 1:
1393                                ret_ins_addr = cfg_node.instruction_addrs[-2]
1394                            else:
1395                                l.error('At %s: expecting more than one instruction. Only got one.', cfg_node)
1396                                ret_ins_addr = None
1397                        else:
1398                            ret_ins_addr = cfg_node.instruction_addrs[-1]
1399
1400                    # Things might be a bit difficult here. _graph_add_edge() requires both nodes to exist, but here
1401                    # the return target node may not exist yet. If that's the case, we will put it into a "delayed edge
1402                    # list", and add this edge later when the return target CFGNode is created.
1403                    if return_target_key in self._nodes:
1404                        self._graph_add_edge(job.block_id, return_target_key, jumpkind='Ijk_Ret',
1405                                             stmt_id=DEFAULT_STATEMENT, ins_addr=ret_ins_addr)
1406                    else:
1407                        self._pending_edges[return_target_key].append((job.block_id, return_target_key,
1408                                                                       {
1409                                                                           'jumpkind': 'Ijk_Ret',
1410                                                                           'stmt_id': DEFAULT_STATEMENT,
1411                                                                           'ins_addr': ret_ins_addr,
1412                                                                        }
1413                                                                       )
1414                                                                      )
1415
1416            else:
1417                # There are no successors, but we still want to update the function graph
1418                artifacts = job.sim_successors.artifacts
1419                if 'irsb' in artifacts and 'insn_addrs' in artifacts and artifacts['insn_addrs']:
1420                    the_irsb = artifacts['irsb']
1421                    insn_addrs = artifacts['insn_addrs']
1422                    self._handle_job_without_successors(job, the_irsb, insn_addrs)
1423
1424        # TODO: replace it with a DDG-based function IO analysis
1425        # handle all actions
1426        if successors:
1427            self._handle_actions(successors[0],
1428                                 sim_successors,
1429                                 job.current_function,
1430                                 job.current_stack_pointer,
1431                                 set(),
1432                                 )
1433
1434        return successors
1435
1436    def _post_job_handling(self, job, new_jobs, successors):
1437        """
1438
1439        :param CFGJob job:
1440        :param successors:
1441        :return:
1442        """
1443
1444        # Finally, post-process CFG Node and log the return target
1445        if job.extra_info:
1446            if job.extra_info['is_call_jump'] and job.extra_info['return_target'] is not None:
1447                job.cfg_node.return_target = job.extra_info['return_target']
1448
1449        # Debugging output if needed
1450        if l.level == logging.DEBUG:
1451            # Only in DEBUG mode do we process and output all those shit
1452            self._post_handle_job_debug(job, successors)
1453
1454        # SimInspect breakpoints support
1455        job.state._inspect('cfg_handle_job', BP_AFTER)
1456
1457    def _post_process_successors(self, input_state, sim_successors, successors):
1458        """
1459        Filter the list of successors
1460
1461        :param SimState      input_state:            Input state.
1462        :param SimSuccessors sim_successors:         The SimSuccessors instance.
1463        :param list successors:                      A list of successors generated after processing the current block.
1464        :return:                                     A list of successors.
1465        :rtype:                                      list
1466        """
1467
1468        if sim_successors.sort == 'IRSB' and input_state.thumb:
1469            successors = self._arm_thumb_filter_jump_successors(sim_successors.artifacts['irsb'],
1470                                                                successors,
1471                                                                lambda state: state.scratch.ins_addr,
1472                                                                lambda state: state.scratch.exit_stmt_idx,
1473                                                                lambda state: state.history.jumpkind,
1474                                                                )
1475
1476        # If there is a call exit, we shouldn't put the default exit (which
1477        # is artificial) into the CFG. The exits will be Ijk_Call and
1478        # Ijk_FakeRet, and Ijk_Call always goes first
1479        extra_info = {'is_call_jump': False,
1480                      'call_target': None,
1481                      'return_target': None,
1482                      'last_call_exit_target': None,
1483                      'skip_fakeret': False,
1484                      }
1485
1486        # Post-process jumpkind before touching all_successors
1487        for suc in sim_successors.all_successors:  # we process all successors here to include potential unsat successors
1488            suc_jumpkind = suc.history.jumpkind
1489            if self._is_call_jumpkind(suc_jumpkind):
1490                extra_info['is_call_jump'] = True
1491                break
1492
1493        return successors, extra_info
1494
1495    def _post_handle_job_debug(self, job, successors):
1496        """
1497        Post job handling: print debugging information regarding the current job.
1498
1499        :param CFGJob job:      The current CFGJob instance.
1500        :param list successors: All successors of the analysis job.
1501        :return: None
1502        """
1503
1504        sim_successors = job.sim_successors
1505        call_stack_suffix = job.call_stack_suffix
1506        extra_info = job.extra_info
1507        successor_status = job.successor_status
1508
1509        func = self.project.loader.find_symbol(job.func_addr)
1510        obj = self.project.loader.find_object_containing(job.addr)
1511        function_name = func.name if func is not None else None
1512        module_name = obj.provides if obj is not None else None
1513
1514        node = self.model.get_node(job.block_id)
1515        depth_str = "(D:%s)" % node.depth if node.depth is not None else ""
1516
1517        l.debug("%s [%#x%s | %s]", sim_successors.description, sim_successors.addr, depth_str,
1518                "->".join([hex(i) for i in call_stack_suffix if i is not None])
1519                )
1520        l.debug("(Function %s of binary %s)", function_name, module_name)
1521        l.debug("|    Call jump: %s", extra_info['is_call_jump'] if extra_info is not None else 'unknown')
1522
1523        for suc in successors:
1524            jumpkind = suc.history.jumpkind
1525            if jumpkind == "Ijk_FakeRet":
1526                exit_type_str = "Simulated Ret"
1527            else:
1528                exit_type_str = "-"
1529            try:
1530                l.debug("|    target: %#x %s [%s] %s", suc.solver.eval_one(suc.ip), successor_status[suc],
1531                        exit_type_str, jumpkind)
1532            except (SimValueError, SimSolverModeError):
1533                l.debug("|    target cannot be concretized. %s [%s] %s", successor_status[suc], exit_type_str,
1534                        jumpkind)
1535        l.debug("%d exits remaining, %d exits pending.", len(self._job_info_queue), len(self._pending_jobs))
1536        l.debug("%d unique basic blocks are analyzed so far.", len(self._analyzed_addrs))
1537
1538    def _iteratively_clean_pending_exits(self):
1539        """
1540        Iteratively update the completed functions set, analyze whether each function returns or not, and remove
1541        pending exits if the callee function does not return. We do this iteratively so that we come to a fixed point
1542        in the end. In most cases, the number of outer iteration equals to the maximum levels of wrapped functions
1543        whose "returningness" is determined by the very last function it calls.
1544
1545        :return: None
1546        """
1547
1548        while True:
1549            # did we finish analyzing any function?
1550            # fill in self._completed_functions
1551            self._make_completed_functions()
1552
1553            if self._pending_jobs:
1554                # There are no more remaining jobs, but only pending jobs left. Each pending job corresponds to
1555                # a previous job that does not return properly.
1556                # Now it's a good time to analyze each function (that we have so far) and determine if it is a)
1557                # returning, b) not returning, or c) unknown. For those functions that are definitely not returning,
1558                # remove the corresponding pending exits from `pending_jobs` array. Perform this procedure iteratively
1559                # until no new not-returning functions appear. Then we pick a pending exit based on the following
1560                # priorities:
1561                # - Job pended by a returning function
1562                # - Job pended by an unknown function
1563
1564                new_changes = self._iteratively_analyze_function_features()
1565                functions_do_not_return = new_changes['functions_do_not_return']
1566
1567                self._update_function_callsites(functions_do_not_return)
1568
1569                if not self._clean_pending_exits():
1570                    # no more pending exits are removed. we are good to go!
1571                    break
1572            else:
1573                break
1574
1575    def _clean_pending_exits(self):
1576        """
1577        Remove those pending exits if:
1578        a) they are the return exits of non-returning SimProcedures
1579        b) they are the return exits of non-returning syscalls
1580        c) they are the return exits of non-returning functions
1581
1582        :return: True if any pending exits are removed, False otherwise
1583        :rtype: bool
1584        """
1585
1586        pending_exits_to_remove = [ ]
1587
1588        for block_id, jobs in self._pending_jobs.items():
1589            for pe in jobs:
1590                if pe.returning_source is None:
1591                    # The original call failed. This pending exit must be followed.
1592                    continue
1593
1594                func = self.kb.functions.function(pe.returning_source)
1595                if func is None:
1596                    # Why does it happen?
1597                    l.warning("An expected function at %s is not found. Please report it to Fish.",
1598                              hex(pe.returning_source) if pe.returning_source is not None else 'None')
1599                    continue
1600
1601                if func.returning is False:
1602                    # Oops, it's not returning
1603                    # Remove this pending exit
1604                    pending_exits_to_remove.append(block_id)
1605
1606                    # We want to mark that call as not returning in the current function
1607                    current_function_addr = self._block_id_current_func_addr(block_id)
1608                    if current_function_addr is not None:
1609                        current_function = self.kb.functions.function(current_function_addr)
1610                        if current_function is not None:
1611                            call_site_addr = self._block_id_addr(pe.src_block_id)
1612                            current_function._call_sites[call_site_addr] = (func.addr, None)
1613                        else:
1614                            l.warning('An expected function at %#x is not found. Please report it to Fish.',
1615                                      current_function_addr
1616                                      )
1617
1618        for block_id in pending_exits_to_remove:
1619            l.debug('Removing all pending exits to %#x since the target function %#x does not return',
1620                    self._block_id_addr(block_id),
1621                    next(iter(self._pending_jobs[block_id])).returning_source,
1622                    )
1623
1624            for to_remove in self._pending_jobs[block_id]:
1625                self._deregister_analysis_job(to_remove.caller_func_addr, to_remove)
1626
1627            del self._pending_jobs[block_id]
1628
1629        if pending_exits_to_remove:
1630            return True
1631        return False
1632
1633    # Successor handling
1634
1635    def _pre_handle_successor_state(self, extra_info, jumpkind, target_addr):
1636        """
1637
1638        :return: None
1639        """
1640
1641        # Fill up extra_info
1642        if extra_info['is_call_jump'] and self._is_call_jumpkind(jumpkind):
1643            extra_info['call_target'] = target_addr
1644
1645        if (extra_info['is_call_jump'] and
1646                    jumpkind == 'Ijk_FakeRet'):
1647            extra_info['return_target'] = target_addr
1648
1649        if jumpkind == "Ijk_Call":
1650            extra_info['last_call_exit_target'] = target_addr
1651
1652    def _handle_successor(self, job, successor, successors):
1653        """
1654        Returns a new CFGJob instance for further analysis, or None if there is no immediate state to perform the
1655        analysis on.
1656
1657        :param CFGJob job: The current job.
1658        """
1659
1660        state = successor
1661        all_successor_states = successors
1662        addr = job.addr
1663
1664        # The PathWrapper instance to return
1665        pw = None
1666
1667        job.successor_status[state] = ""
1668
1669        new_state = state.copy()
1670        suc_jumpkind = state.history.jumpkind
1671        suc_exit_stmt_idx = state.scratch.exit_stmt_idx
1672        suc_exit_ins_addr = state.scratch.exit_ins_addr
1673
1674        if suc_jumpkind in {'Ijk_EmWarn', 'Ijk_NoDecode', 'Ijk_MapFail', 'Ijk_NoRedir',
1675                            'Ijk_SigTRAP', 'Ijk_SigSEGV', 'Ijk_ClientReq'}:
1676            # Ignore SimExits that are of these jumpkinds
1677            job.successor_status[state] = "Skipped"
1678            return [ ]
1679
1680        call_target = job.extra_info['call_target']
1681        if suc_jumpkind == "Ijk_FakeRet" and call_target is not None:
1682            # if the call points to a SimProcedure that doesn't return, we don't follow the fakeret anymore
1683            if self.project.is_hooked(call_target):
1684                sim_proc = self.project._sim_procedures[call_target]
1685                if sim_proc.NO_RET:
1686                    return [ ]
1687
1688        # Get target address
1689        try:
1690            target_addr = state.solver.eval_one(state.ip)
1691        except (SimValueError, SimSolverModeError):
1692            # It cannot be concretized currently. Maybe we can handle it later, maybe it just cannot be concretized
1693            target_addr = None
1694            if suc_jumpkind == "Ijk_Ret":
1695                target_addr = job.call_stack.current_return_target
1696                if target_addr is not None:
1697                    new_state.ip = new_state.solver.BVV(target_addr, new_state.arch.bits)
1698
1699        if target_addr is None:
1700            # Unlucky...
1701            return [ ]
1702
1703        if state.thumb:
1704            # Make sure addresses are always odd. It is important to encode this information in the address for the
1705            # time being.
1706            target_addr |= 1
1707
1708        # see if the target successor is in our whitelist
1709        if self._address_whitelist is not None:
1710            if target_addr not in self._address_whitelist:
1711                l.debug("Successor %#x is not in the address whitelist. Skip.", target_addr)
1712                return [ ]
1713
1714        # see if this edge is in the base graph
1715        if self._base_graph is not None:
1716            # TODO: make it more efficient. the current implementation is half-assed and extremely slow
1717            for src_, dst_ in self._base_graph.edges():
1718                if src_.addr == addr and dst_.addr == target_addr:
1719                    break
1720            else:
1721                # not found
1722                l.debug("Edge (%#x -> %#x) is not found in the base graph. Skip.", addr, target_addr)
1723                return [ ]
1724
1725        # Fix target_addr for syscalls
1726        if suc_jumpkind.startswith("Ijk_Sys"):
1727            syscall_proc = self.project.simos.syscall(new_state)
1728            if syscall_proc is not None:
1729                target_addr = syscall_proc.addr
1730
1731        self._pre_handle_successor_state(job.extra_info, suc_jumpkind, target_addr)
1732
1733        if suc_jumpkind == "Ijk_FakeRet":
1734            if target_addr == job.extra_info['last_call_exit_target']:
1735                l.debug("... skipping a fake return exit that has the same target with its call exit.")
1736                job.successor_status[state] = "Skipped"
1737                return [ ]
1738
1739            if job.extra_info['skip_fakeret']:
1740                l.debug('... skipping a fake return exit since the function it\'s calling doesn\'t return')
1741                job.successor_status[state] = "Skipped - non-returning function 0x%x" % job.extra_info['call_target']
1742                return [ ]
1743
1744        # TODO: Make it optional
1745        if (suc_jumpkind == 'Ijk_Ret' and
1746                    self._call_depth is not None and
1747                    len(job.call_stack) <= 1
1748            ):
1749            # We cannot continue anymore since this is the end of the function where we started tracing
1750            l.debug('... reaching the end of the starting function, skip.')
1751            job.successor_status[state] = "Skipped - reaching the end of the starting function"
1752            return [ ]
1753
1754        # Create the new call stack of target block
1755        new_call_stack = self._create_new_call_stack(addr, all_successor_states, job, target_addr,
1756                                                     suc_jumpkind)
1757        # Create the callstack suffix
1758        new_call_stack_suffix = new_call_stack.stack_suffix(self._context_sensitivity_level)
1759        # Tuple that will be used to index this exit
1760        new_tpl = self._generate_block_id(new_call_stack_suffix, target_addr, suc_jumpkind.startswith('Ijk_Sys'))
1761
1762        # We might have changed the mode for this basic block
1763        # before. Make sure it is still running in 'fastpath' mode
1764        self._reset_state_mode(new_state, 'fastpath')
1765
1766        pw = CFGJob(target_addr,
1767                    new_state,
1768                    self._context_sensitivity_level,
1769                    src_block_id=job.block_id,
1770                    src_exit_stmt_idx=suc_exit_stmt_idx,
1771                    src_ins_addr=suc_exit_ins_addr,
1772                    call_stack=new_call_stack,
1773                    jumpkind=suc_jumpkind,
1774                    )
1775        # Special case: If the binary has symbols and the target address is a function, but for some reason (e.g.,
1776        # a tail-call optimization) the CallStack's function address is still the old function address, we will have to
1777        # overwrite it here.
1778        if not self._is_call_jumpkind(pw.jumpkind):
1779            target_symbol = self.project.loader.find_symbol(target_addr)
1780            if target_symbol and target_symbol.is_function:
1781                # Force update the function address
1782                pw.func_addr = target_addr
1783
1784        # Generate new exits
1785        if suc_jumpkind == "Ijk_Ret":
1786            # This is the real return exit
1787            job.successor_status[state] = "Appended"
1788
1789        elif suc_jumpkind == "Ijk_FakeRet":
1790            # This is the default "fake" retn that generated at each
1791            # call. Save them first, but don't process them right
1792            # away
1793            # st = self.project._simos.prepare_call_state(new_state, initial_state=saved_state)
1794            st = new_state
1795            self._reset_state_mode(st, 'fastpath')
1796
1797            pw = None # clear the job
1798            pe = PendingJob(job.func_addr,
1799                            job.extra_info['call_target'],
1800                            st,
1801                            job.block_id,
1802                            suc_exit_stmt_idx,
1803                            suc_exit_ins_addr,
1804                            new_call_stack
1805                            )
1806            self._pending_jobs[new_tpl].append(pe)
1807            self._register_analysis_job(pe.caller_func_addr, pe)
1808            job.successor_status[state] = "Pended"
1809
1810        elif self._traced_addrs[new_call_stack_suffix][target_addr] >= 1 and suc_jumpkind == "Ijk_Ret":
1811            # This is a corner case for the f****** ARM instruction
1812            # like
1813            # BLEQ <address>
1814            # If we have analyzed the boring exit before returning from that called address, we will lose the link
1815            # between the last block of the function being called and the basic block it returns to. We cannot
1816            # reanalyze the basic block as we are not flow-sensitive, but we can still record the connection and make
1817            # for it afterwards.
1818            pass
1819
1820        else:
1821            job.successor_status[state] = "Appended"
1822
1823        if job.extra_info['is_call_jump'] and job.extra_info['call_target'] in self._non_returning_functions:
1824            job.extra_info['skip_fakeret'] = True
1825
1826        if not pw:
1827            return [ ]
1828
1829        if self._base_graph is not None:
1830            # remove all existing jobs that has the same block ID
1831            if next((en for en in self.jobs if en.block_id == pw.block_id), None):
1832                # TODO: this is very hackish. Reimplement this logic later
1833                self._job_info_queue = [entry for entry in self._job_info_queue if entry.job.block_id != pw.block_id]
1834
1835        # register the job
1836        self._register_analysis_job(pw.func_addr, pw)
1837
1838        return [ pw ]
1839
1840    def _handle_job_without_successors(self, job, irsb, insn_addrs):
1841        """
1842        A block without successors should still be handled so it can be added to the function graph correctly.
1843
1844        :param CFGJob job:  The current job that do not have any successor.
1845        :param IRSB irsb:   The related IRSB.
1846        :param insn_addrs:  A list of instruction addresses of this IRSB.
1847        :return: None
1848        """
1849
1850        # it's not an empty block
1851
1852        # handle all conditional exits
1853        ins_addr = job.addr
1854        for stmt_idx, stmt in enumerate(irsb.statements):
1855            if type(stmt) is pyvex.IRStmt.IMark:
1856                ins_addr = stmt.addr + stmt.delta
1857            elif type(stmt) is pyvex.IRStmt.Exit:
1858                successor_jumpkind = stmt.jk
1859                self._update_function_transition_graph(
1860                    job.block_id, None,
1861                    jumpkind = successor_jumpkind,
1862                    ins_addr=ins_addr,
1863                    stmt_idx=stmt_idx,
1864                )
1865
1866        # handle the default exit
1867        successor_jumpkind = irsb.jumpkind
1868        successor_last_ins_addr = insn_addrs[-1]
1869        self._update_function_transition_graph(job.block_id, None,
1870                                               jumpkind=successor_jumpkind,
1871                                               ins_addr=successor_last_ins_addr,
1872                                               stmt_idx=DEFAULT_STATEMENT,
1873                                               )
1874
1875    # SimAction handling
1876
1877    def _handle_actions(self, state, current_run, func, sp_addr, accessed_registers):
1878        """
1879        For a given state and current location of of execution, will update a function by adding the offets of
1880        appropriate actions to the stack variable or argument registers for the fnc.
1881
1882        :param SimState state: upcoming state.
1883        :param SimSuccessors current_run: possible result states.
1884        :param knowledge.Function func: current function.
1885        :param int sp_addr: stack pointer address.
1886        :param set accessed_registers: set of before accessed registers.
1887        """
1888        se = state.solver
1889
1890        if func is not None and sp_addr is not None:
1891
1892            # Fix the stack pointer (for example, skip the return address on the stack)
1893            new_sp_addr = sp_addr + self.project.arch.call_sp_fix
1894
1895            actions = [a for a in state.history.recent_actions if a.bbl_addr == current_run.addr]
1896
1897            for a in actions:
1898                if a.type == "mem" and a.action == "read":
1899                    try:
1900                        addr = se.eval_one(a.addr.ast, default=0)
1901                    except (claripy.ClaripyError, SimSolverModeError):
1902                        continue
1903                    if (self.project.arch.call_pushes_ret and addr >= new_sp_addr) or \
1904                            (not self.project.arch.call_pushes_ret and addr >= new_sp_addr):
1905                        # TODO: What if a variable locates higher than the stack is modified as well? We probably want
1906                        # TODO: to make sure the accessing address falls in the range of stack
1907                        offset = addr - new_sp_addr
1908                        func._add_argument_stack_variable(offset)
1909                elif a.type == "reg":
1910                    offset = a.offset
1911                    if a.action == "read" and offset not in accessed_registers:
1912                        func._add_argument_register(offset)
1913                    elif a.action == "write":
1914                        accessed_registers.add(offset)
1915        else:
1916            l.error(
1917                "handle_actions: Function not found, or stack pointer is None. It might indicates unbalanced stack.")
1918
1919    # Private utils - DiGraph construction and manipulation
1920
1921    def _graph_get_node(self, node_key, terminator_for_nonexistent_node=False):
1922        """
1923
1924        :param node_key:
1925        :return:
1926        """
1927
1928        if node_key not in self._nodes:
1929            if not terminator_for_nonexistent_node:
1930                return None
1931            # Generate a PathTerminator node
1932            addr = self._block_id_addr(node_key)
1933            func_addr = self._block_id_current_func_addr(node_key)
1934            if func_addr is None:
1935                # We'll have to use the current block address instead
1936                # TODO: Is it really OK?
1937                func_addr = self._block_id_addr(node_key)
1938
1939            is_thumb = isinstance(self.project.arch, ArchARM) and addr % 2 == 1
1940
1941            pt = CFGENode(self._block_id_addr(node_key),
1942                          None,
1943                          self.model,
1944                          input_state=None,
1945                          simprocedure_name="PathTerminator",
1946                          function_address=func_addr,
1947                          callstack_key=self._block_id_callstack_key(node_key),
1948                          thumb=is_thumb
1949                          )
1950            if self._keep_state:
1951                # We don't have an input state available for it (otherwise we won't have to create a
1952                # PathTerminator). This is just a trick to make get_any_irsb() happy.
1953                pt.input_state = self.project.factory.entry_state()
1954                pt.input_state.ip = pt.addr
1955            self._nodes[node_key] = pt
1956            self._nodes_by_addr[pt.addr].append(pt)
1957
1958            if is_thumb:
1959                self._thumb_addrs.add(addr)
1960                self._thumb_addrs.add(addr - 1)
1961
1962            l.debug("Block ID %s does not exist. Create a PathTerminator instead.",
1963                    self._block_id_repr(node_key))
1964
1965        return self._nodes[node_key]
1966
1967    def _graph_add_edge(self, src_node_key, dst_node_key, **kwargs):
1968        """
1969
1970        :param src_node_key:
1971        :param dst_node_key:
1972        :param jumpkind:
1973        :param exit_stmt_idx:
1974        :return:
1975        """
1976
1977        dst_node = self._graph_get_node(dst_node_key, terminator_for_nonexistent_node=True)
1978
1979        if src_node_key is None:
1980            self.graph.add_node(dst_node)
1981
1982        else:
1983            src_node = self._graph_get_node(src_node_key, terminator_for_nonexistent_node=True)
1984            self.graph.add_edge(src_node, dst_node, **kwargs)
1985
1986    def _update_function_transition_graph(self, src_node_key, dst_node_key, jumpkind='Ijk_Boring', ins_addr=None,
1987                                          stmt_idx=None, confirmed=None):
1988        """
1989        Update transition graphs of functions in function manager based on information passed in.
1990
1991        :param src_node_key:    Node key of the source CFGNode. Might be None.
1992        :param dst_node:        Node key of the destination CFGNode. Might be None.
1993        :param str jumpkind:    Jump kind of this transition.
1994        :param int ret_addr:    The theoretical return address for calls.
1995        :param int or None ins_addr:    Address of the instruction where this transition is made.
1996        :param int or None stmt_idx:    ID of the statement where this transition is made.
1997        :param bool or None confirmed:  Whether this call transition has been confirmed or not.
1998        :return: None
1999        """
2000
2001        if dst_node_key is not None:
2002            dst_node = self._graph_get_node(dst_node_key, terminator_for_nonexistent_node=True)
2003            dst_node_addr = dst_node.addr
2004            dst_codenode = dst_node.to_codenode()
2005            dst_node_func_addr = dst_node.function_address
2006        else:
2007            dst_node = None
2008            dst_node_addr = None
2009            dst_codenode = None
2010            dst_node_func_addr = None
2011
2012        if src_node_key is None:
2013            if dst_node is None:
2014                raise ValueError("Either src_node_key or dst_node_key must be specified.")
2015            self.kb.functions.function(dst_node.function_address, create=True)._register_nodes(True,
2016                                                                                               dst_codenode
2017                                                                                               )
2018            return
2019
2020        src_node = self._graph_get_node(src_node_key, terminator_for_nonexistent_node=True)
2021
2022        # Update the transition graph of current function
2023        if jumpkind == "Ijk_Call":
2024            ret_addr = src_node.return_target
2025            ret_node = self.kb.functions.function(
2026                src_node.function_address,
2027                create=True
2028            )._get_block(ret_addr).codenode if ret_addr else None
2029
2030            self.kb.functions._add_call_to(
2031                function_addr=src_node.function_address,
2032                from_node=src_node.to_codenode(),
2033                to_addr=dst_node_addr,
2034                retn_node=ret_node,
2035                syscall=False,
2036                ins_addr=ins_addr,
2037                stmt_idx=stmt_idx,
2038            )
2039
2040        if jumpkind.startswith('Ijk_Sys'):
2041
2042            self.kb.functions._add_call_to(
2043                function_addr=src_node.function_address,
2044                from_node=src_node.to_codenode(),
2045                to_addr=dst_node_addr,
2046                retn_node=src_node.to_codenode(),  # For syscalls, they are returning to the address of themselves
2047                syscall=True,
2048                ins_addr=ins_addr,
2049                stmt_idx=stmt_idx,
2050            )
2051
2052        elif jumpkind == 'Ijk_Ret':
2053            # Create a return site for current function
2054            self.kb.functions._add_return_from(
2055                function_addr=src_node.function_address,
2056                from_node=src_node.to_codenode(),
2057                to_node=dst_codenode,
2058            )
2059
2060            if dst_node is not None:
2061                # Create a returning edge in the caller function
2062                self.kb.functions._add_return_from_call(
2063                    function_addr=dst_node_func_addr,
2064                    src_function_addr=src_node.function_address,
2065                    to_node=dst_codenode,
2066                )
2067
2068        elif jumpkind == 'Ijk_FakeRet':
2069            self.kb.functions._add_fakeret_to(
2070                function_addr=src_node.function_address,
2071                from_node=src_node.to_codenode(),
2072                to_node=dst_codenode,
2073                confirmed=confirmed,
2074            )
2075
2076        elif jumpkind in ('Ijk_Boring', 'Ijk_InvalICache'):
2077
2078            src_obj = self.project.loader.find_object_containing(src_node.addr)
2079            dest_obj = self.project.loader.find_object_containing(dst_node.addr) if dst_node is not None else None
2080
2081            if src_obj is dest_obj:
2082                # Jump/branch within the same object. Might be an outside jump.
2083                to_outside = src_node.function_address != dst_node_func_addr
2084            else:
2085                # Jump/branch between different objects. Must be an outside jump.
2086                to_outside = True
2087
2088            if not to_outside:
2089                self.kb.functions._add_transition_to(
2090                    function_addr=src_node.function_address,
2091                    from_node=src_node.to_codenode(),
2092                    to_node=dst_codenode,
2093                    ins_addr=ins_addr,
2094                    stmt_idx=stmt_idx,
2095                )
2096
2097            else:
2098                self.kb.functions._add_outside_transition_to(
2099                    function_addr=src_node.function_address,
2100                    from_node=src_node.to_codenode(),
2101                    to_node=dst_codenode,
2102                    to_function_addr=dst_node_func_addr,
2103                    ins_addr=ins_addr,
2104                    stmt_idx=stmt_idx,
2105                )
2106
2107    def _update_function_callsites(self, noreturns):
2108        """
2109        Update the callsites of functions (remove return targets) that are calling functions that are just deemed not
2110        returning.
2111
2112        :param iterable func_addrs: A collection of functions for newly-recovered non-returning functions.
2113        :return:                    None
2114        """
2115
2116        for callee_func in noreturns:
2117            # consult the callgraph to find callers of each function
2118            if callee_func.addr not in self.functions.callgraph:
2119                continue
2120            caller_addrs = self.functions.callgraph.predecessors(callee_func.addr)
2121            for caller_addr in caller_addrs:
2122                caller = self.functions[caller_addr]
2123                if callee_func not in caller.transition_graph:
2124                    continue
2125                callsites = caller.transition_graph.predecessors(callee_func)
2126                for callsite in callsites:
2127                    caller._add_call_site(callsite.addr, callee_func.addr, None)
2128
2129    def _add_additional_edges(self, input_state, sim_successors, cfg_node, successors):
2130        """
2131
2132        :return:
2133        """
2134
2135        # If we have additional edges for this block, add them in
2136        addr = cfg_node.addr
2137
2138        if addr in self._additional_edges:
2139            dests = self._additional_edges[addr]
2140            for dst in dests:
2141                if sim_successors.sort == 'IRSB':
2142                    base_state = sim_successors.all_successors[0].copy()
2143                else:
2144                    if successors:
2145                        # We try to use the first successor.
2146                        base_state = successors[0].copy()
2147                    else:
2148                        # The SimProcedure doesn't have any successor (e.g. it's a PathTerminator)
2149                        # We'll use its input state instead
2150                        base_state = input_state
2151                base_state.ip = dst
2152                # TODO: Allow for sp adjustments
2153                successors.append(base_state)
2154                l.debug("Additional jump target %#x for block %s is appended.", dst, sim_successors.description)
2155
2156        return successors
2157
2158    def _filter_insane_successors(self, successors):
2159        """
2160        Throw away all successors whose target doesn't make sense
2161
2162        This method is called after we resolve an indirect jump using an unreliable method (like, not through one of
2163        the indirect jump resolvers, but through either pure concrete execution or backward slicing) to filter out the
2164        obviously incorrect successors.
2165
2166        :param list successors: A collection of successors.
2167        :return:                A filtered list of successors
2168        :rtype:                 list
2169        """
2170
2171        old_successors = successors[::]
2172        successors = [ ]
2173        for i, suc in enumerate(old_successors):
2174            if suc.solver.symbolic(suc.ip):
2175                # It's symbolic. Take it, and hopefully we can resolve it later
2176                successors.append(suc)
2177
2178            else:
2179                ip_int = suc.solver.eval_one(suc.ip)
2180
2181                if self._is_address_executable(ip_int) or \
2182                        self.project.is_hooked(ip_int) or \
2183                        self.project.simos.is_syscall_addr(ip_int):
2184                    successors.append(suc)
2185                else:
2186                    l.debug('An obviously incorrect successor %d/%d (%#x) is ditched',
2187                            i + 1,
2188                            len(old_successors),
2189                            ip_int
2190                            )
2191
2192        return successors
2193
2194    def _remove_non_return_edges(self):
2195        """
2196        Remove those return_from_call edges that actually do not return due to
2197        calling some not-returning functions.
2198        :return: None
2199        """
2200        for func in self.kb.functions.values():
2201            graph = func.transition_graph
2202            all_return_edges = [(u, v) for (u, v, data) in graph.edges(data=True) if data['type'] == 'return_from_call']
2203            for return_from_call_edge in all_return_edges:
2204                callsite_block_addr, return_to_addr = return_from_call_edge
2205                call_func_addr = func.get_call_target(callsite_block_addr)
2206                if call_func_addr is None:
2207                    continue
2208
2209                call_func = self.kb.functions.function(call_func_addr)
2210                if call_func is None:
2211                    # Weird...
2212                    continue
2213
2214                if call_func.returning is False:
2215                    # Remove that edge!
2216                    graph.remove_edge(call_func_addr, return_to_addr)
2217                    # Remove the edge in CFG
2218                    nodes = self.get_all_nodes(callsite_block_addr)
2219                    for n in nodes:
2220                        successors = self.get_successors_and_jumpkind(n, excluding_fakeret=False)
2221                        for successor, jumpkind in successors:
2222                            if jumpkind == 'Ijk_FakeRet' and successor.addr == return_to_addr:
2223                                self.remove_edge(n, successor)
2224
2225                                # Remove all dangling nodes
2226                                # wcc = list(networkx.weakly_connected_components(graph))
2227                                # for nodes in wcc:
2228                                #    if func.startpoint not in nodes:
2229                                #        graph.remove_nodes_from(nodes)
2230
2231    # Private methods - resolving indirect jumps
2232
2233    @staticmethod
2234    def _convert_indirect_jump_targets_to_states(job, indirect_jump_targets):
2235        """
2236        Convert each concrete indirect jump target into a SimState. If there are non-zero successors and the original
2237        jumpkind is a call, we also generate a fake-ret successor.
2238
2239        :param job:                     The CFGJob instance.
2240        :param indirect_jump_targets:   A collection of concrete jump targets resolved from a indirect jump.
2241        :return:                        A list of SimStates.
2242        :rtype:                         list
2243        """
2244
2245        first_successor = job.sim_successors.all_successors[0]
2246        successors = [ ]
2247        for t in indirect_jump_targets:
2248            # Insert new successors
2249            a = first_successor.copy()
2250            a.ip = t
2251            successors.append(a)
2252        # special case: if the indirect jump is in fact an indirect call, we should create a FakeRet successor
2253        if successors and first_successor.history.jumpkind == 'Ijk_Call':
2254            a = first_successor.copy()
2255            a.ip = job.cfg_node.addr + job.cfg_node.size
2256            a.history.jumpkind = 'Ijk_FakeRet'
2257            successors.append(a)
2258        return successors
2259
2260    def _try_resolving_indirect_jumps(self, sim_successors, cfg_node, func_addr, successors, exception_info, artifacts):
2261        """
2262        Resolve indirect jumps specified by sim_successors.addr.
2263
2264        :param SimSuccessors sim_successors: The SimSuccessors instance.
2265        :param CFGNode cfg_node:                     The CFGNode instance.
2266        :param int func_addr:                        Current function address.
2267        :param list successors:                      A list of successors.
2268        :param tuple exception_info:                 The sys.exc_info() of the exception or None if none occurred.
2269        :param artifacts:                            A container of collected information.
2270        :return:                                     Resolved successors
2271        :rtype:                                      list
2272        """
2273
2274        # Try to resolve indirect jumps with advanced backward slicing (if enabled)
2275        if sim_successors.sort == 'IRSB' and \
2276                self._is_indirect_jump(cfg_node, sim_successors):
2277            l.debug('IRSB %#x has an indirect jump as its default exit', cfg_node.addr)
2278
2279            # We need input states to perform backward slicing
2280            if self._advanced_backward_slicing and self._keep_state:
2281
2282                # Optimization: make sure we only try to resolve an indirect jump if any of the following criteria holds
2283                # - It's a jump (Ijk_Boring), and its target is either fully symbolic, or its resolved target is within
2284                #   the current binary
2285                # - It's a call (Ijk_Call), and its target is fully symbolic
2286                # TODO: This is very hackish, please refactor this part of code later
2287                should_resolve = True
2288                legit_successors = [suc for suc in successors if suc.history.jumpkind in ('Ijk_Boring', 'Ijk_InvalICache', 'Ijk_Call')]
2289                if legit_successors:
2290                    legit_successor = legit_successors[0]
2291                    if legit_successor.ip.symbolic:
2292                        if not legit_successor.history.jumpkind == 'Ijk_Call':
2293                            should_resolve = False
2294                    else:
2295                        if legit_successor.history.jumpkind == 'Ijk_Call':
2296                            should_resolve = False
2297                        else:
2298                            concrete_target = legit_successor.solver.eval(legit_successor.ip)
2299                            if not self.project.loader.find_object_containing(
2300                                    concrete_target) is self.project.loader.main_object:
2301                                should_resolve = False
2302
2303                else:
2304                    # No interesting successors... skip
2305                    should_resolve = False
2306
2307                # TODO: Handle those successors
2308                if not should_resolve:
2309                    l.debug("This might not be an indirect jump that has multiple targets. Skipped.")
2310                    self.kb.unresolved_indirect_jumps.add(cfg_node.addr)
2311
2312                else:
2313                    more_successors = self._backward_slice_indirect(cfg_node, sim_successors, func_addr)
2314
2315                    if more_successors:
2316                        # Remove the symbolic successor
2317                        # TODO: Now we are removing all symbolic successors. Is it possible
2318                        # TODO: that there is more than one symbolic successor?
2319                        all_successors = [suc for suc in successors if not suc.solver.symbolic(suc.ip)]
2320                        # Insert new successors
2321                        # We insert new successors in the beginning of all_successors list so that we don't break the
2322                        # assumption that Ijk_FakeRet is always the last element in the list
2323                        for suc_addr in more_successors:
2324                            a = sim_successors.all_successors[0].copy()
2325                            a.ip = suc_addr
2326                            all_successors.insert(0, a)
2327
2328                        l.debug('The indirect jump is successfully resolved.')
2329                        self.kb.indirect_jumps.update_resolved_addrs(cfg_node.addr, more_successors)
2330
2331                    else:
2332                        l.debug('Failed to resolve the indirect jump.')
2333                        self.kb.unresolved_indirect_jumps.add(cfg_node.addr)
2334
2335            else:
2336                if not successors:
2337                    l.debug('Cannot resolve the indirect jump without advanced backward slicing enabled: %s',
2338                            cfg_node)
2339
2340        # Try to find more successors if we failed to resolve the indirect jump before
2341        if exception_info is None and (cfg_node.is_simprocedure or self._is_indirect_jump(cfg_node, sim_successors)):
2342            has_call_jumps = any(suc_state.history.jumpkind == 'Ijk_Call' for suc_state in successors)
2343            if has_call_jumps:
2344                concrete_successors = [suc_state for suc_state in successors if
2345                                       suc_state.history.jumpkind != 'Ijk_FakeRet' and not suc_state.solver.symbolic(
2346                                           suc_state.ip)]
2347            else:
2348                concrete_successors = [suc_state for suc_state in successors if
2349                                       not suc_state.solver.symbolic(suc_state.ip)]
2350            symbolic_successors = [suc_state for suc_state in successors if suc_state.solver.symbolic(suc_state.ip)]
2351
2352            resolved = True if not symbolic_successors else False
2353            if symbolic_successors:
2354                for suc in symbolic_successors:
2355                    if o.SYMBOLIC in suc.options:
2356                        targets = suc.solver.eval_upto(suc.ip, 32)
2357                        if len(targets) < 32:
2358                            all_successors = []
2359                            resolved = True
2360                            for t in targets:
2361                                new_ex = suc.copy()
2362                                new_ex.ip = suc.solver.BVV(t, suc.ip.size())
2363                                all_successors.append(new_ex)
2364                        else:
2365                            break
2366
2367            if not resolved and (
2368                        (symbolic_successors and not concrete_successors) or
2369                        (not cfg_node.is_simprocedure and self._is_indirect_jump(cfg_node, sim_successors))
2370            ):
2371                l.debug("%s has an indirect jump. See what we can do about it.", cfg_node)
2372
2373                if sim_successors.sort == 'SimProcedure' and \
2374                        sim_successors.artifacts['adds_exits']:
2375                    # Skip those SimProcedures that don't create new SimExits
2376                    l.debug('We got a SimProcedure %s in fastpath mode that creates new exits.', sim_successors.description)
2377                    if self._enable_symbolic_back_traversal:
2378                        successors = self._symbolically_back_traverse(sim_successors, artifacts, cfg_node)
2379                        # mark jump as resolved if we got successors
2380                        if successors:
2381                            succ_addrs = [s.addr for s in successors]
2382                            self.kb.indirect_jumps.update_resolved_addrs(cfg_node.addr, succ_addrs)
2383                        else:
2384                            self.kb.unresolved_indirect_jumps.add(cfg_node.addr)
2385                        l.debug("Got %d concrete exits in symbolic mode.", len(successors))
2386                    else:
2387                        self.kb.unresolved_indirect_jumps.add(cfg_node.addr)
2388                        # keep fake_rets
2389                        successors = [s for s in successors if s.history.jumpkind == "Ijk_FakeRet"]
2390
2391                elif sim_successors.sort == 'IRSB'and \
2392                        any([ex.history.jumpkind != 'Ijk_Ret' for ex in successors]):
2393                    # We cannot properly handle Return as that requires us start execution from the caller...
2394                    l.debug("Try traversal backwards in symbolic mode on %s.", cfg_node)
2395                    if self._enable_symbolic_back_traversal:
2396                        successors = self._symbolically_back_traverse(sim_successors, artifacts, cfg_node)
2397
2398                        # Remove successors whose IP doesn't make sense
2399                        successors = [suc for suc in successors
2400                                          if self._is_address_executable(suc.solver.eval_one(suc.ip))]
2401
2402                        # mark jump as resolved if we got successors
2403                        if successors:
2404                            succ_addrs = [s.addr for s in successors]
2405                            self.kb.indirect_jumps.update_resolved_addrs(cfg_node.addr, succ_addrs)
2406                        else:
2407                            self.kb.unresolved_indirect_jumps.add(cfg_node.addr)
2408                        l.debug('Got %d concrete exits in symbolic mode', len(successors))
2409                    else:
2410                        self.kb.unresolved_indirect_jumps.add(cfg_node.addr)
2411                        successors = []
2412
2413                elif successors and all([ex.history.jumpkind == 'Ijk_Ret' for ex in successors]):
2414                    l.debug('All exits are returns (Ijk_Ret). It will be handled by pending exits.')
2415
2416                else:
2417                    l.debug('Cannot resolve this indirect jump: %s', cfg_node)
2418                    self.kb.unresolved_indirect_jumps.add(cfg_node.addr)
2419
2420        return successors
2421
2422    def _backward_slice_indirect(self, cfgnode, sim_successors, current_function_addr):
2423        """
2424        Try to resolve an indirect jump by slicing backwards
2425        """
2426        # TODO: make this a real indirect jump resolver under the new paradigm
2427
2428        irsb = sim_successors.artifacts['irsb']  # shorthand
2429
2430        l.debug("Resolving indirect jump at IRSB %s", irsb)
2431
2432        # Let's slice backwards from the end of this exit
2433        next_tmp = irsb.next.tmp
2434        stmt_id = [i for i, s in enumerate(irsb.statements)
2435                   if isinstance(s, pyvex.IRStmt.WrTmp) and s.tmp == next_tmp][0]
2436
2437        cdg = self.project.analyses.CDG(cfg=self, fail_fast=self._fail_fast)
2438        ddg = self.project.analyses.DDG(cfg=self, start=current_function_addr, call_depth=0, fail_fast=self._fail_fast)
2439
2440        bc = self.project.analyses.BackwardSlice(self,
2441                                                 cdg,
2442                                                 ddg,
2443                                                 targets=[(cfgnode, stmt_id)],
2444                                                 same_function=True,
2445                                                 fail_fast=self._fail_fast)
2446        taint_graph = bc.taint_graph
2447        # Find the correct taint
2448        next_nodes = [cl for cl in taint_graph.nodes() if cl.block_addr == sim_successors.addr]
2449
2450        if not next_nodes:
2451            l.error('The target exit is not included in the slice. Something is wrong')
2452            return []
2453
2454        next_node = next_nodes[0]
2455
2456        # Get the weakly-connected subgraph that contains `next_node`
2457        all_subgraphs = ( networkx.induced_subgraph(taint_graph, nodes) for nodes in networkx.weakly_connected_components(taint_graph))
2458        starts = set()
2459        for subgraph in all_subgraphs:
2460            if next_node in subgraph:
2461                # Make sure there is no symbolic read...
2462                # FIXME: This is an over-approximation. We should try to limit the starts more
2463                nodes = [n for n in subgraph.nodes() if subgraph.in_degree(n) == 0]
2464                for n in nodes:
2465                    starts.add(n.block_addr)
2466
2467        # Execute the slice
2468        successing_addresses = set()
2469        annotated_cfg = bc.annotated_cfg()
2470        for start in starts:
2471            l.debug('Start symbolic execution at 0x%x on program slice.', start)
2472            # Get the state from our CFG
2473            node = self.get_any_node(start)
2474            if node is None:
2475                # Well, we have to live with an empty state
2476                base_state = self.project.factory.blank_state(addr=start)
2477            else:
2478                base_state = node.input_state.copy()
2479                base_state.set_mode('symbolic')
2480                base_state.ip = start
2481
2482                # Clear all initial taints (register values, memory values, etc.)
2483                initial_nodes = [n for n in bc.taint_graph.nodes() if bc.taint_graph.in_degree(n) == 0]
2484                for cl in initial_nodes:
2485                    # Iterate in all actions of this node, and pick corresponding actions
2486                    cfg_nodes = self.get_all_nodes(cl.block_addr)
2487                    for n in cfg_nodes:
2488                        if not n.final_states:
2489                            continue
2490                        actions = [ac for ac in n.final_states[0].history.recent_actions
2491                                   # Normally it's enough to only use the first final state
2492                                   if ac.bbl_addr == cl.block_addr and
2493                                   ac.stmt_idx == cl.stmt_idx
2494                                   ]
2495                        for ac in actions:
2496                            if not hasattr(ac, 'action'):
2497                                continue
2498                            if ac.action == 'read':
2499                                if ac.type == 'mem':
2500                                    unconstrained_value = base_state.solver.Unconstrained('unconstrained',
2501                                                                                      ac.size.ast * 8)
2502                                    base_state.memory.store(ac.addr,
2503                                                            unconstrained_value,
2504                                                            endness=self.project.arch.memory_endness)
2505                                elif ac.type == 'reg':
2506                                    unconstrained_value = base_state.solver.Unconstrained('unconstrained',
2507                                                                                      ac.size.ast * 8)
2508                                    base_state.registers.store(ac.offset,
2509                                                               unconstrained_value,
2510                                                               endness=self.project.arch.register_endness)
2511
2512                # Clear the constraints!
2513                base_state.release_plugin('solver')
2514
2515            # For speed concerns, we are limiting the timeout for z3 solver to 5 seconds
2516            base_state.solver._solver.timeout = 5000
2517
2518            sc = self.project.factory.simulation_manager(base_state)
2519            sc.use_technique(Slicecutor(annotated_cfg))
2520            sc.use_technique(LoopSeer(bound=1))
2521            sc.run()
2522
2523            if sc.cut or sc.deadended:
2524                all_deadended_states = sc.cut + sc.deadended
2525                for s in all_deadended_states:
2526                    if s.addr == sim_successors.addr:
2527                        # We want to get its successors
2528                        succs = s.step()
2529                        for succ in succs.flat_successors:
2530                            successing_addresses.add(succ.addr)
2531
2532            else:
2533                l.debug("Cannot determine the exit. You need some better ways to recover the exits :-(")
2534
2535        l.debug('Resolution is done, and we have %d new successors.', len(successing_addresses))
2536
2537        return list(successing_addresses)
2538
2539    def _symbolically_back_traverse(self, current_block, block_artifacts, cfg_node):
2540        """
2541        Symbolically executes from ancestor nodes (2-5 times removed) finding paths of execution through the given
2542        CFGNode.
2543
2544        :param SimSuccessors current_block: SimSuccessor with address to attempt to navigate to.
2545        :param dict block_artifacts:  Container of IRSB data - specifically used for known persistant register values.
2546        :param CFGNode cfg_node:      Current node interested around.
2547        :returns:                     Double checked concrete successors.
2548        :rtype: List
2549        """
2550        class register_protector:
2551            def __init__(self, reg_offset, info_collection):
2552                """
2553                Class to overwrite registers.
2554
2555                :param int reg_offest:       Register offset to overwrite from.
2556                :param dict info_collection: New register offsets to use (in container).
2557                """
2558                self._reg_offset = reg_offset
2559                self._info_collection = info_collection
2560
2561            def write_persistent_register(self, state_):
2562                """
2563                Writes over given registers from self._info_coollection (taken from block_artifacts)
2564
2565                :param SimSuccessors state_: state to update registers for
2566                """
2567                if state_.inspect.address is None:
2568                    l.error('state.inspect.address is None. It will be fixed by Yan later.')
2569                    return
2570
2571                if state_.registers.load(self._reg_offset).symbolic:
2572                    current_run = state_.inspect.address
2573                    if current_run in self._info_collection and \
2574                            not state_.solver.symbolic(self._info_collection[current_run][self._reg_offset]):
2575                        l.debug("Overwriting %s with %s", state_.registers.load(self._reg_offset),
2576                                self._info_collection[current_run][self._reg_offset])
2577                        state_.registers.store(
2578                            self._reg_offset,
2579                            self._info_collection[current_run][self._reg_offset]
2580                        )
2581
2582        l.debug("Start back traversal from %s", current_block)
2583
2584        # Create a partial CFG first
2585        temp_cfg = networkx.DiGraph(self.graph)
2586        # Reverse it
2587        temp_cfg.reverse(copy=False)
2588
2589        path_length = 0
2590        concrete_exits = []
2591        if cfg_node not in temp_cfg.nodes():
2592            # TODO: Figure out why this is happening
2593            return concrete_exits
2594
2595        keep_running = True
2596        while not concrete_exits and path_length < 5 and keep_running:
2597            path_length += 1
2598            queue = [cfg_node]
2599            avoid = set()
2600            for _ in range(path_length):
2601                new_queue = []
2602                for n in queue:
2603                    successors = list(temp_cfg.successors(n))
2604                    for suc in successors:
2605                        jk = temp_cfg.get_edge_data(n, suc)['jumpkind']
2606                        if jk != 'Ijk_Ret':
2607                            # We don't want to trace into libraries
2608                            predecessors = list(temp_cfg.predecessors(suc))
2609                            avoid |= {p.addr for p in predecessors if p is not n}
2610                            new_queue.append(suc)
2611                queue = new_queue
2612
2613            if path_length <= 1:
2614                continue
2615
2616            for n in queue:
2617                # Start symbolic exploration from each block
2618                state = self.project.factory.blank_state(addr=n.addr,
2619                                                         mode='symbolic',
2620                                                         add_options={
2621                                                                         o.DO_RET_EMULATION,
2622                                                                         o.CONSERVATIVE_READ_STRATEGY,
2623                                                                     } | o.resilience
2624                                                         )
2625                # Avoid concretization of any symbolic read address that is over a certain limit
2626                # TODO: test case is needed for this option
2627
2628                # Set initial values of persistent regs
2629                if n.addr in block_artifacts:
2630                    for reg in state.arch.persistent_regs:
2631                        state.registers.store(reg, block_artifacts[n.addr][reg])
2632                for reg in state.arch.persistent_regs:
2633                    reg_protector = register_protector(reg, block_artifacts)
2634                    state.inspect.add_breakpoint('reg_write',
2635                                                 BP(
2636                                                     BP_AFTER,
2637                                                     reg_write_offset=state.arch.registers[reg][0],
2638                                                     action=reg_protector.write_persistent_register
2639                                                 )
2640                                                 )
2641                simgr = self.project.factory.simulation_manager(state)
2642                simgr.use_technique(LoopSeer(bound=10))
2643                simgr.use_technique(Explorer(find=current_block.addr, avoid=avoid))
2644                simgr.use_technique(LengthLimiter(path_length))
2645                simgr.run()
2646
2647                if simgr.found:
2648                    simgr = self.project.factory.simulation_manager(simgr.one_found, save_unsat=True)
2649                    simgr.step()
2650                    if simgr.active or simgr.unsat:
2651                        keep_running = False
2652                        concrete_exits.extend(simgr.active)
2653                        concrete_exits.extend(simgr.unsat)
2654                if keep_running:
2655                    l.debug('Step back for one more run...')
2656
2657        # Make sure these successors are actually concrete
2658        # We just use the ip, persistent registers, and jumpkind to initialize the original unsat state
2659        # TODO: It works for jumptables, but not for calls. We should also handle changes in sp
2660        new_concrete_successors = []
2661        for c in concrete_exits:
2662            unsat_state = current_block.unsat_successors[0].copy()
2663            unsat_state.history.jumpkind = c.history.jumpkind
2664            for reg in unsat_state.arch.persistent_regs + ['ip']:
2665                unsat_state.registers.store(reg, c.registers.load(reg))
2666            new_concrete_successors.append(unsat_state)
2667
2668        return new_concrete_successors
2669
2670    def _get_symbolic_function_initial_state(self, function_addr, fastpath_mode_state=None):
2671        """
2672        Symbolically execute the first basic block of the specified function,
2673        then returns it. We prepares the state using the already existing
2674        state in fastpath mode (if avaiable).
2675        :param function_addr: The function address
2676        :return: A symbolic state if succeeded, None otherwise
2677        """
2678        if function_addr is None:
2679            return None
2680
2681        if function_addr in self._symbolic_function_initial_state:
2682            return self._symbolic_function_initial_state[function_addr]
2683
2684        if fastpath_mode_state is not None:
2685            fastpath_state = fastpath_mode_state
2686        else:
2687            if function_addr in self._function_input_states:
2688                fastpath_state = self._function_input_states[function_addr]
2689            else:
2690                raise AngrCFGError('The impossible happened. Please report to Fish.')
2691
2692        symbolic_initial_state = self.project.factory.entry_state(mode='symbolic')
2693        if fastpath_state is not None:
2694            symbolic_initial_state = self.project.simos.prepare_call_state(fastpath_state,
2695                                                                            initial_state=symbolic_initial_state)
2696
2697        # Find number of instructions of start block
2698        func = self.project.kb.functions.get(function_addr)
2699        start_block = func._get_block(function_addr)
2700        num_instr = start_block.instructions - 1
2701
2702        symbolic_initial_state.ip = function_addr
2703        path = self.project.factory.path(symbolic_initial_state)
2704        try:
2705            sim_successors = self.project.factory.successors(path.state, num_inst=num_instr)
2706        except (SimError, AngrError):
2707            return None
2708
2709        # We execute all but the last instruction in this basic block, so we have a cleaner
2710        # state
2711        # Start execution!
2712        exits = sim_successors.flat_successors + sim_successors.unsat_successors
2713
2714        if exits:
2715            final_st = None
2716            for ex in exits:
2717                if ex.satisfiable():
2718                    final_st = ex
2719                    break
2720        else:
2721            final_st = None
2722
2723        self._symbolic_function_initial_state[function_addr] = final_st
2724
2725        return final_st
2726
2727    # Private methods - function hints
2728
2729    def _search_for_function_hints(self, successor_state):
2730        """
2731        Scan for constants that might be used as exit targets later, and add them into pending_exits.
2732
2733        :param SimState successor_state: A successing state.
2734        :return:                                 A list of discovered code addresses.
2735        :rtype:                                  list
2736        """
2737
2738        function_hints = []
2739
2740        for action in successor_state.history.recent_actions:
2741            if action.type == 'reg' and action.offset == self.project.arch.ip_offset:
2742                # Skip all accesses to IP registers
2743                continue
2744            elif action.type == 'exit':
2745                # only consider read/write actions
2746                continue
2747
2748            # Enumerate actions
2749            if isinstance(action, SimActionData):
2750                data = action.data
2751                if data is not None:
2752                    # TODO: Check if there is a proper way to tell whether this const falls in the range of code
2753                    # TODO: segments
2754                    # Now let's live with this big hack...
2755                    try:
2756                        const = successor_state.solver.eval_one(data.ast)
2757                    except:  # pylint: disable=bare-except
2758                        continue
2759
2760                    if self._is_address_executable(const):
2761                        if self._pending_function_hints is not None and const in self._pending_function_hints:
2762                            continue
2763
2764                        # target = const
2765                        # tpl = (None, None, target)
2766                        # st = self.project._simos.prepare_call_state(self.project.initial_state(mode='fastpath'),
2767                        #                                           initial_state=saved_state)
2768                        # st = self.project.initial_state(mode='fastpath')
2769                        # exits[tpl] = (st, None, None)
2770
2771                        function_hints.append(const)
2772
2773        l.debug('Got %d possible exits, including: %s', len(function_hints),
2774               ", ".join(["0x%x" % f for f in function_hints])
2775               )
2776
2777        return function_hints
2778
2779    # Private methods - creation of stuff (SimSuccessors, CFGNode, call-stack, etc.)
2780
2781    def _get_simsuccessors(self, addr, job, current_function_addr=None):
2782        """
2783        Create the SimSuccessors instance for a block.
2784
2785        :param int addr:                  Address of the block.
2786        :param CFGJob job:                The CFG job instance with an input state inside.
2787        :param int current_function_addr: Address of the current function.
2788        :return:                          A SimSuccessors instance
2789        :rtype:                           SimSuccessors
2790        """
2791
2792        exception_info = None
2793        state = job.state
2794        saved_state = job.state  # We don't have to make a copy here
2795
2796        # respect the basic block size from base graph
2797        block_size = None
2798        if self._base_graph is not None:
2799            for n in self._base_graph.nodes():
2800                if n.addr == addr:
2801                    block_size = n.size
2802                    break
2803
2804        try:
2805            sim_successors = None
2806
2807            if not self._keep_state:
2808                if self.project.is_hooked(addr):
2809                    old_proc = self.project._sim_procedures[addr]
2810                    is_continuation = old_proc.is_continuation
2811                elif self.project.simos.is_syscall_addr(addr):
2812                    old_proc = self.project.simos.syscall_from_addr(addr)
2813                    is_continuation = False  # syscalls don't support continuation
2814                else:
2815                    old_proc = None
2816                    is_continuation = None
2817
2818                if old_proc is not None and \
2819                        not is_continuation and \
2820                        not old_proc.ADDS_EXITS and \
2821                        not old_proc.NO_RET:
2822                    # DON'T CREATE USELESS SIMPROCEDURES if we don't care about the accuracy of states
2823                    # When generating CFG, a SimProcedure will not be created as it is but be created as a
2824                    # ReturnUnconstrained stub if it satisfies the following conditions:
2825                    # - It doesn't add any new exits.
2826                    # - It returns as normal.
2827                    # In this way, we can speed up the CFG generation by quite a lot as we avoid simulating
2828                    # those functions like read() and puts(), which has no impact on the overall control flow at all.
2829                    #
2830                    # Special notes about SimProcedure continuation: Any SimProcedure instance that is a continuation
2831                    # will add new exits, otherwise the original SimProcedure wouldn't have been executed anyway. Hence
2832                    # it's reasonable for us to always simulate a SimProcedure with continuation.
2833
2834                    old_name = None
2835
2836                    if old_proc.is_syscall:
2837                        new_stub = SIM_PROCEDURES["stubs"]["syscall"]
2838                        ret_to = state.regs.ip_at_syscall
2839                    else:
2840                        # normal SimProcedures
2841                        new_stub = SIM_PROCEDURES["stubs"]["ReturnUnconstrained"]
2842                        ret_to = None
2843
2844                    old_name = old_proc.display_name
2845
2846                    # instantiate the stub
2847                    new_stub_inst = new_stub(display_name=old_name)
2848
2849                    sim_successors = self.project.factory.procedure_engine.process(
2850                        state,
2851                        procedure=new_stub_inst,
2852                        force_addr=addr,
2853                        ret_to=ret_to,
2854                    )
2855
2856            if sim_successors is None:
2857                jumpkind = state.history.jumpkind
2858                jumpkind = 'Ijk_Boring' if jumpkind is None else jumpkind
2859                sim_successors = self.project.factory.successors(
2860                        state,
2861                        jumpkind=jumpkind,
2862                        size=block_size,
2863                        opt_level=self._iropt_level)
2864
2865        except (SimFastPathError, SimSolverModeError) as ex:
2866
2867            if saved_state.mode == 'fastpath':
2868                # Got a SimFastPathError or SimSolverModeError in FastPath mode.
2869                # We wanna switch to symbolic mode for current IRSB.
2870                l.debug('Switch to symbolic mode for address %#x', addr)
2871                # Make a copy of the current 'fastpath' state
2872
2873                l.debug('Symbolic jumps at basic block %#x.', addr)
2874
2875                new_state = None
2876                if addr != current_function_addr:
2877                    new_state = self._get_symbolic_function_initial_state(current_function_addr)
2878
2879                if new_state is None:
2880                    new_state = state.copy()
2881                    new_state.set_mode('symbolic')
2882                new_state.options.add(o.DO_RET_EMULATION)
2883                # Remove bad constraints
2884                # FIXME: This is so hackish...
2885                new_state.solver._solver.constraints = [c for c in new_state.solver.constraints if
2886                                                    c.op != 'BoolV' or c.args[0] is not False]
2887                new_state.solver._solver._result = None
2888                # Swap them
2889                saved_state, job.state = job.state, new_state
2890                sim_successors, exception_info, _ = self._get_simsuccessors(addr, job)
2891
2892            else:
2893                exception_info = sys.exc_info()
2894                # Got a SimSolverModeError in symbolic mode. We are screwed.
2895                # Skip this IRSB
2896                l.debug("Caught a SimIRSBError %s. Don't panic, this is usually expected.", ex)
2897                inst = SIM_PROCEDURES["stubs"]["PathTerminator"]()
2898                sim_successors = ProcedureEngine().process(state, procedure=inst)
2899
2900        except SimIRSBError:
2901            exception_info = sys.exc_info()
2902            # It's a tragedy that we came across some instructions that VEX
2903            # does not support. I'll create a terminating stub there
2904            l.debug("Caught a SimIRSBError during CFG recovery. Creating a PathTerminator.", exc_info=True)
2905            inst = SIM_PROCEDURES["stubs"]["PathTerminator"]()
2906            sim_successors = ProcedureEngine().process(state, procedure=inst)
2907
2908        except claripy.ClaripyError:
2909            exception_info = sys.exc_info()
2910            l.debug("Caught a ClaripyError during CFG recovery. Don't panic, this is usually expected.", exc_info=True)
2911            # Generate a PathTerminator to terminate the current path
2912            inst = SIM_PROCEDURES["stubs"]["PathTerminator"]()
2913            sim_successors = ProcedureEngine().process(state, procedure=inst)
2914
2915        except SimError:
2916            exception_info = sys.exc_info()
2917            l.debug("Caught a SimError during CFG recovery. Don't panic, this is usually expected.", exc_info=True)
2918            # Generate a PathTerminator to terminate the current path
2919            inst = SIM_PROCEDURES["stubs"]["PathTerminator"]()
2920            sim_successors = ProcedureEngine().process(state, procedure=inst)
2921
2922        except AngrExitError as ex:
2923            exception_info = sys.exc_info()
2924            l.debug("Caught a AngrExitError during CFG recovery. Don't panic, this is usually expected.", exc_info=True)
2925            # Generate a PathTerminator to terminate the current path
2926            inst = SIM_PROCEDURES["stubs"]["PathTerminator"]()
2927            sim_successors = ProcedureEngine().process(state, procedure=inst)
2928
2929        except AngrError:
2930            exception_info = sys.exc_info()
2931            section = self.project.loader.main_object.find_section_containing(addr)
2932            if section is None:
2933                sec_name = 'No section'
2934            else:
2935                sec_name = section.name
2936            # AngrError shouldn't really happen though
2937            l.debug("Caught an AngrError during CFG recovery at %#x (%s)",
2938                    addr, sec_name, exc_info=True)
2939            # We might be on a wrong branch, and is likely to encounter the
2940            # "No bytes in memory xxx" exception
2941            # Just ignore it
2942            sim_successors = None
2943
2944        return sim_successors, exception_info, saved_state
2945
2946    def _create_new_call_stack(self, addr, all_jobs, job, exit_target, jumpkind):
2947        """
2948        Creates a new call stack, and according to the jumpkind performs appropriate actions.
2949
2950        :param int addr:                          Address to create at.
2951        :param Simsuccessors all_jobs:            Jobs to get stack pointer from or return address.
2952        :param CFGJob job:                        CFGJob to copy current call stack from.
2953        :param int exit_target:                   Address of exit target.
2954        :param str jumpkind:                      The type of jump performed.
2955        :returns:                                 New call stack for target block.
2956        :rtype:                                   CallStack
2957        """
2958
2959        if self._is_call_jumpkind(jumpkind):
2960            new_call_stack = job.call_stack_copy()
2961            # Notice that in ARM, there are some freaking instructions
2962            # like
2963            # BLEQ <address>
2964            # It should give us three exits: Ijk_Call, Ijk_Boring, and
2965            # Ijk_Ret. The last exit is simulated.
2966            # Notice: We assume the last exit is the simulated one
2967            if len(all_jobs) > 1 and all_jobs[-1].history.jumpkind == "Ijk_FakeRet":
2968                se = all_jobs[-1].solver
2969                retn_target_addr = se.eval_one(all_jobs[-1].ip, default=0)
2970                sp = se.eval_one(all_jobs[-1].regs.sp, default=0)
2971
2972                new_call_stack = new_call_stack.call(addr, exit_target,
2973                                    retn_target=retn_target_addr,
2974                                    stack_pointer=sp)
2975
2976            elif jumpkind.startswith('Ijk_Sys') and len(all_jobs) == 1:
2977                # This is a syscall. It returns to the same address as itself (with a different jumpkind)
2978                retn_target_addr = exit_target
2979                se = all_jobs[0].solver
2980                sp = se.eval_one(all_jobs[0].regs.sp, default=0)
2981                new_call_stack = new_call_stack.call(addr, exit_target,
2982                                    retn_target=retn_target_addr,
2983                                    stack_pointer=sp)
2984
2985            else:
2986                # We don't have a fake return exit available, which means
2987                # this call doesn't return.
2988                new_call_stack = CallStack()
2989                se = all_jobs[-1].solver
2990                sp = se.eval_one(all_jobs[-1].regs.sp, default=0)
2991
2992                new_call_stack = new_call_stack.call(addr, exit_target, retn_target=None, stack_pointer=sp)
2993
2994        elif jumpkind == "Ijk_Ret":
2995            # Normal return
2996            new_call_stack = job.call_stack_copy()
2997            try:
2998                new_call_stack = new_call_stack.ret(exit_target)
2999            except SimEmptyCallStackError:
3000                pass
3001
3002            se = all_jobs[-1].solver
3003            sp = se.eval_one(all_jobs[-1].regs.sp, default=0)
3004            old_sp = job.current_stack_pointer
3005
3006            # Calculate the delta of stack pointer
3007            if sp is not None and old_sp is not None:
3008                delta = sp - old_sp
3009                func_addr = job.func_addr
3010
3011                if self.kb.functions.function(func_addr) is None:
3012                    # Create the function if it doesn't exist
3013                    # FIXME: But hell, why doesn't it exist in the first place?
3014                    l.error("Function 0x%x doesn't exist in function manager although it should be there."
3015                            "Look into this issue later.",
3016                            func_addr)
3017                    self.kb.functions.function(func_addr, create=True)
3018
3019                # Set sp_delta of the function
3020                self.kb.functions.function(func_addr, create=True).sp_delta = delta
3021
3022        elif jumpkind == 'Ijk_FakeRet':
3023            # The fake return...
3024            new_call_stack = job.call_stack
3025
3026        else:
3027            # although the jumpkind is not Ijk_Call, it may still jump to a new function... let's see
3028            if self.project.is_hooked(exit_target):
3029                hooker = self.project.hooked_by(exit_target)
3030                if not hooker is procedures.stubs.UserHook.UserHook:
3031                    # if it's not a UserHook, it must be a function
3032                    # Update the function address of the most recent call stack frame
3033                    new_call_stack = job.call_stack_copy()
3034                    new_call_stack.current_function_address = exit_target
3035
3036                else:
3037                    # TODO: We need a way to mark if a user hook is a function or not
3038                    # TODO: We can add a map in Project to store this information
3039                    # For now we are just assuming they are not functions, which is mostly the case
3040                    new_call_stack = job.call_stack
3041
3042            else:
3043                # Normal control flow transition
3044                new_call_stack = job.call_stack
3045
3046        return new_call_stack
3047
3048    def _create_cfgnode(self, sim_successors, call_stack, func_addr, block_id=None, depth=None, exception_info=None):
3049        """
3050        Create a context-sensitive CFGNode instance for a specific block.
3051
3052        :param SimSuccessors sim_successors:         The SimSuccessors object.
3053        :param CallStack call_stack_suffix:          The call stack.
3054        :param int func_addr:                        Address of the current function.
3055        :param BlockID block_id:                     The block ID of this CFGNode.
3056        :param int or None depth:                    Depth of this CFGNode.
3057        :return:                                     A CFGNode instance.
3058        :rtype:                                      CFGNode
3059        """
3060
3061        sa = sim_successors.artifacts  # shorthand
3062
3063        # Determine if this is a SimProcedure, and further, if this is a syscall
3064        syscall = None
3065        is_syscall = False
3066        if sim_successors.sort == 'SimProcedure':
3067            is_simprocedure = True
3068            if sa['is_syscall'] is True:
3069                is_syscall = True
3070                syscall = sim_successors.artifacts['procedure']
3071        else:
3072            is_simprocedure = False
3073
3074        if is_simprocedure:
3075            simproc_name = sa['name'].split('.')[-1]
3076            if simproc_name == "ReturnUnconstrained" and 'resolves' in sa and sa['resolves'] is not None:
3077                simproc_name = sa['resolves']
3078
3079            no_ret = False
3080            if syscall is not None and sa['no_ret']:
3081                no_ret = True
3082
3083            cfg_node = CFGENode(sim_successors.addr,
3084                                None,
3085                                self.model,
3086                                callstack_key=call_stack.stack_suffix(self.context_sensitivity_level),
3087                                input_state=None,
3088                                simprocedure_name=simproc_name,
3089                                syscall_name=syscall,
3090                                no_ret=no_ret,
3091                                is_syscall=is_syscall,
3092                                function_address=sim_successors.addr,
3093                                block_id=block_id,
3094                                depth=depth,
3095                                creation_failure_info=exception_info,
3096                                thumb=(isinstance(self.project.arch, ArchARM) and sim_successors.addr & 1),
3097                                )
3098
3099        else:
3100            cfg_node = CFGENode(sim_successors.addr,
3101                                sa['irsb_size'],
3102                                self.model,
3103                                callstack_key=call_stack.stack_suffix(self.context_sensitivity_level),
3104                                input_state=None,
3105                                is_syscall=is_syscall,
3106                                function_address=func_addr,
3107                                block_id=block_id,
3108                                depth=depth,
3109                                irsb=sim_successors.artifacts['irsb'],
3110                                creation_failure_info=exception_info,
3111                                thumb=(isinstance(self.project.arch, ArchARM) and sim_successors.addr & 1),
3112                                )
3113
3114        return cfg_node
3115
3116    # Private methods - loops and graph normalization
3117
3118    def _detect_loops(self, loop_callback=None):
3119        """
3120        Loop detection.
3121
3122        :param func loop_callback: A callback function for each detected loop backedge.
3123        :return: None
3124        """
3125
3126        loop_finder = self.project.analyses.LoopFinder(kb=self.kb, normalize=False, fail_fast=self._fail_fast)
3127
3128        if loop_callback is not None:
3129            graph_copy = networkx.DiGraph(self._graph)
3130
3131            for loop in loop_finder.loops:  # type: angr.analyses.loopfinder.Loop
3132                loop_callback(graph_copy, loop)
3133
3134            self.model.graph = graph_copy
3135
3136        # Update loop backedges and graph
3137        self._loop_back_edges = list(itertools.chain.from_iterable(loop.continue_edges for loop in loop_finder.loops))
3138
3139    # Private methods - function/procedure/subroutine analysis
3140    # Including calling convention, function arguments, etc.
3141
3142    def _refine_function_arguments(self, func, callsites):
3143        """
3144
3145        :param func:
3146        :param callsites:
3147        :return:
3148        """
3149
3150        for i, c in enumerate(callsites):
3151            # Execute one block ahead of the callsite, and execute one basic block after the callsite
3152            # In this process, the following tasks are performed:
3153            # - Record registers/stack variables that are modified
3154            # - Record the change of the stack pointer
3155            # - Check if the return value is used immediately
3156            # We assume that the stack is balanced before and after the call (you can have caller clean-up of course).
3157            # Any abnormal behaviors will be logged.
3158            # Hopefully this approach will allow us to have a better understanding of parameters of those function
3159            # stubs and function proxies.
3160
3161            if c.simprocedure_name is not None:
3162                # Skip all SimProcedures
3163                continue
3164
3165            l.debug("Refining %s at 0x%x (%d/%d).", repr(func), c.addr, i, len(callsites))
3166
3167            # Get a basic block ahead of the callsite
3168            blocks_ahead = [c]
3169
3170            # the block after
3171            blocks_after = []
3172            successors = self.get_successors_and_jumpkind(c, excluding_fakeret=False)
3173            for s, jk in successors:
3174                if jk == 'Ijk_FakeRet':
3175                    blocks_after = [s]
3176                    break
3177
3178            regs_overwritten = set()
3179            stack_overwritten = set()
3180            regs_read = set()
3181            regs_written = set()
3182
3183            try:
3184                # Execute the predecessor
3185                path = self.project.factory.path(
3186                    self.project.factory.blank_state(mode="fastpath", addr=blocks_ahead[0].addr))
3187                path.step()
3188                all_successors = path.next_run.successors + path.next_run.unsat_successors
3189                if not all_successors:
3190                    continue
3191
3192                suc = all_successors[0]
3193                se = suc.solver
3194                # Examine the path log
3195                actions = suc.history.recent_actions
3196                sp = se.eval_one(suc.regs.sp, default=0) + self.project.arch.call_sp_fix
3197                for ac in actions:
3198                    if ac.type == "reg" and ac.action == "write":
3199                        regs_overwritten.add(ac.offset)
3200                    elif ac.type == "mem" and ac.action == "write":
3201                        addr = se.eval_one(ac.addr.ast, default=0)
3202                        if (self.project.arch.call_pushes_ret and addr >= sp + self.project.arch.bytes) or \
3203                                (not self.project.arch.call_pushes_ret and addr >= sp):
3204                            offset = addr - sp
3205                            stack_overwritten.add(offset)
3206
3207                func.prepared_registers.add(tuple(regs_overwritten))
3208                func.prepared_stack_variables.add(tuple(stack_overwritten))
3209
3210            except (SimError, AngrError):
3211                pass
3212
3213            try:
3214                if blocks_after:
3215                    path = self.project.factory.path(
3216                        self.project.factory.blank_state(mode="fastpath", addr=blocks_after[0].addr))
3217                    path.step()
3218                    all_successors = path.next_run.successors + path.next_run.unsat_successors
3219                    if not all_successors:
3220                        continue
3221
3222                    suc = all_successors[0]
3223                    actions = suc.history.recent_actions
3224                    for ac in actions:
3225                        if ac.type == "reg" and ac.action == "read" and ac.offset not in regs_written:
3226                            regs_read.add(ac.offset)
3227                        elif ac.type == "reg" and ac.action == "write":
3228                            regs_written.add(ac.offset)
3229
3230                    # Filter registers, remove unnecessary registers from the set
3231                    # regs_overwritten = self.project.arch.argument_registers.intersection(regs_overwritten)
3232                    regs_read = self.project.arch.argument_registers.intersection(regs_read)
3233
3234                    func.registers_read_afterwards.add(tuple(regs_read))
3235
3236            except (SimError, AngrError):
3237                pass
3238
3239    # Private methods - dominators and post-dominators
3240
3241    def _immediate_dominators(self, node, target_graph=None, reverse_graph=False):
3242        """
3243        Get all immediate dominators of sub graph from given node upwards.
3244
3245        :param str node: id of the node to navigate forwards from.
3246        :param networkx.classes.digraph.DiGraph target_graph: graph to analyse, default is self.graph.
3247        :param bool reverse_graph: Whether the target graph should be reversed before analysation.
3248
3249        :return: each node of graph as index values, with element as respective node's immediate dominator.
3250        :rtype: dict
3251        """
3252        if target_graph is None:
3253            target_graph = self.graph
3254
3255        if node not in target_graph:
3256            raise AngrCFGError('Target node %s is not in graph.' % node)
3257
3258        graph = networkx.DiGraph(target_graph)
3259        if reverse_graph:
3260            # Reverse the graph without deepcopy
3261            for n in target_graph.nodes():
3262                graph.add_node(n)
3263            for src, dst in target_graph.edges():
3264                graph.add_edge(dst, src)
3265
3266        idom = {node: node}
3267
3268        order = list(networkx.dfs_postorder_nodes(graph, node))
3269        dfn = {u: i for i, u in enumerate(order)}
3270        order.pop()
3271        order.reverse()
3272
3273        def intersect(u_, v_):
3274            """
3275            Finds the highest (in postorder valuing) point of intersection above two node arguments.
3276
3277            :param str u_: nx node id.
3278            :param str v_: nx node id.
3279            :return: intersection of paths.
3280            :rtype: str
3281            """
3282            while u_ != v_:
3283                while dfn[u_] < dfn[v_]:
3284                    u_ = idom[u_]
3285                while dfn[u_] > dfn[v_]:
3286                    v_ = idom[v_]
3287            return u_
3288
3289        changed = True
3290        while changed:
3291            changed = False
3292            for u in order:
3293                new_idom = reduce(intersect, (v for v in graph.pred[u] if v in idom))
3294                if u not in idom or idom[u] != new_idom:
3295                    idom[u] = new_idom
3296                    changed = True
3297
3298        return idom
3299
3300    #
3301    # Static private utility methods
3302    #
3303
3304    @staticmethod
3305    def _is_indirect_jump(_, sim_successors):
3306        """
3307        Determine if this SimIRSB has an indirect jump as its exit
3308        """
3309
3310        if sim_successors.artifacts['irsb_direct_next']:
3311            # It's a direct jump
3312            return False
3313
3314        default_jumpkind = sim_successors.artifacts['irsb_default_jumpkind']
3315        if default_jumpkind not in ('Ijk_Call', 'Ijk_Boring', 'Ijk_InvalICache'):
3316            # It's something else, like a ret of a syscall... we don't care about it
3317            return False
3318
3319        return True
3320
3321    @staticmethod
3322    def _generate_block_id(call_stack_suffix, block_addr, is_syscall):
3323        if not is_syscall:
3324            return BlockID.new(block_addr, call_stack_suffix, 'normal')
3325        return BlockID.new(block_addr, call_stack_suffix, 'syscall')
3326
3327    @staticmethod
3328    def _block_id_repr(block_id):
3329        return repr(block_id)
3330
3331    @staticmethod
3332    def _block_id_callstack_key(block_id):
3333        return block_id.callsite_tuples
3334
3335    @staticmethod
3336    def _block_id_addr(block_id):
3337        return block_id.addr
3338
3339    @staticmethod
3340    def _block_id_current_func_addr(block_id):
3341        """
3342        If we don't have any information about the caller, we have no way to get the address of the current function.
3343
3344        :param BlockID block_id: The block ID.
3345        :return:                 The function address if there is one, or None if it's not possible to get.
3346        :rtype:                  int or None
3347        """
3348
3349        if block_id.callsite_tuples:
3350            return block_id.callsite_tuples[-1]
3351        return None
3352
3353    #
3354    # Other private utility methods
3355    #
3356
3357    @staticmethod
3358    def _is_call_jumpkind(jumpkind):
3359        if jumpkind == 'Ijk_Call' or jumpkind.startswith('Ijk_Sys_'):
3360            return True
3361        return False
3362
3363    def _push_unresolvable_run(self, block_address):
3364        """
3365        Adds this block to the list of unresolvable runs.
3366
3367        :param dict block_address: container of IRSB data from run.
3368        """
3369        self._unresolvable_runs.add(block_address)
3370
3371    def _is_address_executable(self, address):
3372        """
3373        Check if the specific address is in one of the executable ranges.
3374
3375        :param int address: The address
3376        :return: True if it's in an executable range, False otherwise
3377        """
3378
3379        for r in self._executable_address_ranges:
3380            if r[0] <= address < r[1]:
3381                return True
3382        return False
3383
3384    def _update_thumb_addrs(self, simsuccessors, state):
3385        """
3386
3387        :return:
3388        """
3389
3390        # For ARM THUMB mode
3391        if simsuccessors.sort == 'IRSB' and state.thumb:
3392            insn_addrs = simsuccessors.artifacts['insn_addrs']
3393            self._thumb_addrs.update(insn_addrs)
3394            self._thumb_addrs.update(map(lambda x: x + 1, insn_addrs))
3395
3396    def _get_callsites(self, function_address):
3397        """
3398        Get where a specific function is called.
3399        :param function_address:    Address of the target function
3400        :return:                    A list of CFGNodes whose exits include a call/jump to the given function
3401        """
3402
3403        all_predecessors = []
3404
3405        nodes = self.get_all_nodes(function_address)
3406        for n in nodes:
3407            predecessors = list(self.get_predecessors(n))
3408            all_predecessors.extend(predecessors)
3409
3410        return all_predecessors
3411
3412    def _get_nx_paths(self, begin, end):
3413        """
3414        Get the possible (networkx) simple paths between two nodes or addresses
3415        corresponding to nodes.
3416        Input: addresses or node instances
3417        Return: a list of lists of nodes representing paths.
3418        """
3419        if isinstance(begin, int) and isinstance(end, int):
3420            n_begin = self.get_any_node(begin)
3421            n_end = self.get_any_node(end)
3422
3423        elif isinstance(begin, CFGENode) and isinstance(end, CFGENode):
3424            n_begin = begin
3425            n_end = end
3426        else:
3427            raise AngrCFGError("from and to should be of the same type")
3428
3429        self.remove_fakerets()
3430        return networkx.all_shortest_paths(self.graph, n_begin, n_end)
3431
3432    def _quasi_topological_sort(self):
3433        """
3434        Perform a quasi-topological sort on an already constructed CFG graph (a networkx DiGraph)
3435
3436        :return: None
3437        """
3438
3439        # Clear the existing sorting result
3440        self._quasi_topological_order = {}
3441
3442        ctr = self._graph.number_of_nodes()
3443
3444        for ep in self._entry_points:
3445            # FIXME: This is not always correct. We'd better store CFGNodes in self._entry_points
3446            ep_node = self.get_any_node(ep)
3447
3448            if not ep_node:
3449                continue
3450
3451            for n in networkx.dfs_postorder_nodes(self._graph, source=ep_node):
3452                if n not in self._quasi_topological_order:
3453                    self._quasi_topological_order[n] = ctr
3454                    ctr -= 1
3455
3456    def _reset_state_mode(self, state, mode):
3457        """
3458        Reset the state mode to the given mode, and apply the custom state options specified with this analysis.
3459
3460        :param state:    The state to work with.
3461        :param str mode: The state mode.
3462        :return:         None
3463        """
3464
3465        state.set_mode(mode)
3466        state.options |= self._state_add_options
3467        state.options = state.options.difference(self._state_remove_options)
3468
3469from angr.analyses import AnalysesHub
3470AnalysesHub.register_default('CFGEmulated', CFGEmulated)
3471