1import os
2import logging
3import sys
4
5from nose.plugins.attrib import attr
6import nose.tools
7
8import archinfo
9import angr
10
11from angr.analyses.cfg.cfg_fast import SegmentList
12from angr.knowledge_plugins.cfg import CFGNode, CFGModel, MemoryDataSort
13
14l = logging.getLogger("angr.tests.test_cfgfast")
15
16test_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'binaries', 'tests')
17
18def cfg_fast_functions_check(arch, binary_path, func_addrs, func_features):
19    """
20    Generate a fast CFG on the given binary, and test if all specified functions are found
21
22    :param str arch: the architecture, will be prepended to `binary_path`
23    :param str binary_path: path to the binary under the architecture directory
24    :param dict func_addrs: A collection of function addresses that should be recovered
25    :param dict func_features: A collection of features for some of the functions
26    :return: None
27    """
28
29    path = os.path.join(test_location, arch, binary_path)
30    proj = angr.Project(path, load_options={'auto_load_libs': False})
31
32    cfg = proj.analyses.CFGFast()
33    nose.tools.assert_true(set(cfg.kb.functions.keys()).issuperset(func_addrs))
34
35    for func_addr, feature_dict in func_features.items():
36        returning = feature_dict.get("returning", "undefined")
37        if returning != "undefined":
38            nose.tools.assert_is(cfg.kb.functions.function(addr=func_addr).returning, returning)
39
40    # Segment only
41    cfg = proj.analyses.CFGFast(force_segment=True)
42    nose.tools.assert_true(set(cfg.kb.functions.keys()).issuperset(func_addrs))
43
44    for func_addr, feature_dict in func_features.items():
45        returning = feature_dict.get("returning", "undefined")
46        if returning != "undefined":
47            nose.tools.assert_is(cfg.kb.functions.function(addr=func_addr).returning, returning)
48
49    # with normalization enabled
50    cfg = proj.analyses.CFGFast(force_segment=True, normalize=True)
51    nose.tools.assert_true(set(cfg.kb.functions.keys()).issuperset(func_addrs))
52
53    for func_addr, feature_dict in func_features.items():
54        returning = feature_dict.get("returning", "undefined")
55        if returning != "undefined":
56            nose.tools.assert_is(cfg.kb.functions.function(addr=func_addr).returning, returning)
57
58def cfg_fast_edges_check(arch, binary_path, edges):
59    """
60    Generate a fast CFG on the given binary, and test if all edges are found.
61
62    :param str arch: the architecture, will be prepended to `binary_path`
63    :param str binary_path: path to the binary under the architecture directory
64    :param list edges: a list of edges
65    :return: None
66    """
67
68    path = os.path.join(test_location, arch, binary_path)
69    proj = angr.Project(path, load_options={'auto_load_libs': False})
70
71    cfg = proj.analyses.CFGFast()
72
73    for src, dst in edges:
74        src_node = cfg.model.get_any_node(src)
75        dst_node = cfg.model.get_any_node(dst)
76        nose.tools.assert_is_not_none(src_node, msg="CFG node 0x%x is not found." % src)
77        nose.tools.assert_is_not_none(dst_node, msg="CFG node 0x%x is not found." % dst)
78        nose.tools.assert_in(dst_node, src_node.successors,
79                             msg="CFG edge %s-%s is not found." % (src_node, dst_node)
80                             )
81
82def test_cfg_0():
83    filename = 'cfg_0'
84    functions = {
85        'x86_64': {
86            0x400410,
87            0x400420,
88            0x400430,
89            0x400440,
90            0x400470,
91            0x40052c,
92            0x40053c,
93        }
94    }
95    arches = functions.keys()
96
97    function_features = {
98        'x86_64': {}
99    }
100
101    for arch in arches:
102        yield cfg_fast_functions_check, arch, filename, functions[arch], function_features[arch]
103
104def test_cfg_0_pe():
105    filename = 'cfg_0_pe'
106    functions = {
107        'x86_64': {
108            # 0x40150a,  # currently angr identifies 0x40150e due to the way _func_addrs_from_prologues() is
109                         # implemented. this issue can be resolved with a properly implemented approach like Byte-Weight
110            0x4014f0,
111        }
112    }
113    arches = functions.keys()
114
115    function_features = {
116        'x86_64': {}
117    }
118
119    for arch in arches:
120        yield cfg_fast_functions_check, arch, filename, functions[arch], function_features[arch]
121
122
123@attr(speed='slow')
124def test_busybox():
125    filename = "busybox"
126    edges = {
127        "mipsel": {
128            (0x4091ec, 0x408de0),
129            (0x449acc, 0x5003b8),  # call to putenv. address of putenv may change in the future
130            (0x467cfc, 0x500014),  # call to free. address of free may change in the future
131        }
132    }
133
134    for arch, edges_ in edges.items():
135        yield cfg_fast_edges_check, arch, filename, edges_
136
137
138def test_fauxware():
139    filename = "fauxware"
140    functions = {
141        'x86_64': {
142            0x4004e0,
143            0x400510,
144            0x400520,
145            0x400530,
146            0x400540,
147            0x400550,
148            0x400560,
149            0x400570,  # .plt._exit
150            0x400580,  # _start
151            0x4005ac,
152            0x4005d0,
153            0x400640,
154            0x400664,
155            0x4006ed,
156            0x4006fd,
157            0x40071d,  # main
158            0x4007e0,
159            0x400870,
160            0x400880,
161            0x4008b8,
162        },
163        'mips': {
164            0x400534,  # _init
165            0x400574,
166            0x400598,
167            0x4005d0,  # _ftext
168            0x4005dc,
169            0x400630,  # __do_global_dtors_aux
170            0x4006d4,  # frame_dummy
171            0x400708,
172            0x400710,  # authenticate
173            0x400814,
174            0x400814,  # accepted
175            0x400868,  # rejected
176            0x4008c0,  # main
177            0x400a34,
178            0x400a48,  # __libc_csu_init
179            0x400af8,
180            0x400b00,  # __do_global_ctors_aux
181            0x400b58,
182            ### plt entries
183            0x400b60,  # strcmp
184            0x400b70,  # read
185            0x400b80,  # printf
186            0x400b90,  # puts
187            0x400ba0,  # exit
188            0x400bb0,  # open
189            0x400bc0,  # __libc_start_main
190        },
191    }
192
193    function_features = {
194        'x86_64':
195            {
196                0x400570: # plt.exit
197                    {
198                        "returning": False
199                    },
200                0x4006fd: # rejected
201                    {
202                        "returning": False
203                    }
204            },
205        'mips':
206            {
207                0x400868:  # rejected
208                    {
209                        "returning": False,
210                    }
211            },
212    }
213
214    return_edges = {
215        'x86_64':
216            [
217                (0x4006fb, 0x4007c7)  # return from accepted to main
218            ],
219        'mips':
220            [
221                (0x40084c, 0x400a04)  # returning edge from accepted to main
222            ],
223    }
224
225    arches = functions.keys()
226
227    for arch in arches:
228        yield cfg_fast_functions_check, arch, filename, functions[arch], function_features[arch]
229        yield cfg_fast_edges_check, arch, filename, return_edges[arch]
230
231def test_cfg_loop_unrolling():
232    filename = "cfg_loop_unrolling"
233    edges = {
234        'x86_64': {
235            (0x400658, 0x400636),
236            (0x400658, 0x400661),
237            (0x400651, 0x400636),
238            (0x400651, 0x400661),
239        }
240    }
241
242    arches = edges.keys()
243
244    for arch in arches:
245        yield cfg_fast_edges_check, arch, filename, edges[arch]
246
247def test_cfg_switches():
248
249    #logging.getLogger('angr.analyses.cfg.cfg_fast').setLevel(logging.INFO)
250    #logging.getLogger('angr.analyses.cfg.indirect_jump_resolvers.jumptable').setLevel(logging.DEBUG)
251
252    filename = "cfg_switches"
253
254    edges = {
255        'x86_64': {
256            # jump table 0 in func_0
257            (0x40053a, 0x400547),
258            (0x40053a, 0x400552),
259            (0x40053a, 0x40055d),
260            (0x40053a, 0x400568),
261            (0x40053a, 0x400573),
262            (0x40053a, 0x400580),
263            (0x40053a, 0x40058d),
264            # jump table 0 in func_1
265            (0x4005bc, 0x4005c9),
266            (0x4005bc, 0x4005d8),
267            (0x4005bc, 0x4005e7),
268            (0x4005bc, 0x4005f6),
269            (0x4005bc, 0x400605),
270            (0x4005bc, 0x400614),
271            (0x4005bc, 0x400623),
272            (0x4005bc, 0x400632),
273            (0x4005bc, 0x40063e),
274            (0x4005bc, 0x40064a),
275            (0x4005bc, 0x4006b0),
276            # jump table 1 in func_1
277            (0x40065a, 0x400667),
278            (0x40065a, 0x400673),
279            (0x40065a, 0x40067f),
280            (0x40065a, 0x40068b),
281            (0x40065a, 0x400697),
282            (0x40065a, 0x4006a3),
283            # jump table 0 in main
284            (0x4006e1, 0x4006ee),
285            (0x4006e1, 0x4006fa),
286            (0x4006e1, 0x40070b),
287            (0x4006e1, 0x40071c),
288            (0x4006e1, 0x40072d),
289            (0x4006e1, 0x40073e),
290            (0x4006e1, 0x40074f),
291            (0x4006e1, 0x40075b),
292        },
293        'armel': {
294            # jump table 0 in func_0
295            (0x10434, 0x10488),
296            (0x10434, 0x104e8),
297            (0x10434, 0x10498),
298            (0x10434, 0x104a8),
299            (0x10434, 0x104b8),
300            (0x10434, 0x104c8),
301            (0x10434, 0x104d8),
302            (0x10454, 0x104e8), # default case
303            # jump table 0 in func_1
304            (0x10524, 0x105cc),
305            (0x10524, 0x106b4),
306            (0x10524, 0x105d8),
307            (0x10524, 0x105e4),
308            (0x10524, 0x105f0),
309            (0x10524, 0x105fc),
310            (0x10524, 0x10608),
311            (0x10524, 0x10614),
312            (0x10524, 0x10620),
313            (0x10524, 0x1062c),
314            (0x10524, 0x10638),
315            (0x10534, 0x106b4),  # default case
316            # jump table 1 in func_1
317            (0x10650, 0x106a4),  # default case
318            (0x10640, 0x10668),
319            (0x10640, 0x10674),
320            (0x10640, 0x10680),
321            (0x10640, 0x1068c),
322            (0x10640, 0x10698),
323            # jump table 0 in main
324            (0x10734, 0x107fc),
325            (0x10734, 0x10808),
326            (0x10734, 0x10818),
327            (0x10734, 0x10828),
328            (0x10734, 0x10838),
329            (0x10734, 0x10848),
330            (0x10734, 0x10858),
331            (0x10734, 0x10864),
332            (0x10744, 0x10864),  # default case
333        },
334        's390x': {
335            # jump table 0 in func_0
336            (0x4007d4, 0x4007ea),  # case 1
337            (0x4007d4, 0x4007f4),  # case 3
338            (0x4007d4, 0x4007fe),  # case 5
339            (0x4007d4, 0x400808),  # case 7
340            (0x4007d4, 0x400812),  # case 9
341            (0x4007d4, 0x40081c),  # case 12
342            (0x4007c0, 0x4007ca),  # default case
343            # jump table 0 in func_1
344            (0x400872, 0x4008ae),  # case 2
345            (0x400872, 0x4008be),  # case 10
346            (0x400872, 0x4008ce),  # case 12
347            (0x400872, 0x4008de),  # case 14
348            (0x400872, 0x4008ee),  # case 15
349            (0x400872, 0x4008fe),  # case 16
350            (0x400872, 0x40090e),  # case 22
351            (0x400872, 0x40091e),  # case 24
352            (0x400872, 0x40092e),  # case 28
353            (0x400872, 0x400888),  # case 38
354            (0x400848, 0x400854),  # default case (1)
355            (0x400872, 0x400854),  # default case (2)
356            # jump table 1 in func_1
357            (0x40093e, 0x400984),  # case 1
358            (0x40093e, 0x400974),  # case 2
359            (0x40093e, 0x400964),  # case 3
360            (0x40093e, 0x400954),  # case 4
361            (0x40093e, 0x400994),  # case 5
362            (0x400898, 0x40089e),  # default case (1)
363            # jump table 0 in main
364            # case 1, 3, 5, 7, 9: optimized out
365            (0x400638, 0x40064e),  # case 2
366            (0x400638, 0x400692),  # case 4
367            (0x400638, 0x4006a4),  # case 6
368            (0x400638, 0x40066e),  # case 8
369            (0x400638, 0x400680),  # case 10
370            # case 45: optimized out
371            (0x40062c, 0x40065c),  # default case
372        }
373    }
374
375    arches = edges.keys()
376
377    for arch in arches:
378        yield cfg_fast_edges_check, arch, filename, edges[arch]
379
380
381def test_cfg_about_time():
382
383    # This is to test the correctness of the PLT stub removal in CFGBase
384    proj = angr.Project(os.path.join(test_location, "x86_64", "about_time"), auto_load_libs=False)
385    cfg = proj.analyses.CFG()
386
387    # a PLT stub that should be removed
388    nose.tools.assert_not_in(0x401026, cfg.kb.functions)
389    # a PLT stub that should be removed
390    nose.tools.assert_not_in(0x4010a6, cfg.kb.functions)
391    # a PLT stub that should be removed
392    nose.tools.assert_not_in(0x40115e, cfg.kb.functions)
393    # the start function that should not be removed
394    nose.tools.assert_in(proj.entry, cfg.kb.functions)
395
396
397def test_segment_list_0():
398    seg_list = SegmentList()
399    seg_list.occupy(0, 1, "code")
400    seg_list.occupy(2, 3, "code")
401
402    nose.tools.assert_equal(len(seg_list), 2)
403    nose.tools.assert_equal(seg_list._list[0].end, 1)
404    nose.tools.assert_equal(seg_list._list[1].end, 5)
405    nose.tools.assert_equal(seg_list.is_occupied(4), True)
406    nose.tools.assert_equal(seg_list.is_occupied(5), False)
407
408def test_segment_list_1():
409    seg_list = SegmentList()
410
411    # They should be merged
412    seg_list.occupy(0, 1, "code")
413    seg_list.occupy(1, 2, "code")
414
415    nose.tools.assert_equal(len(seg_list), 1)
416    nose.tools.assert_equal(seg_list._list[0].start, 0)
417    nose.tools.assert_equal(seg_list._list[0].end, 3)
418
419def test_segment_list_2():
420    seg_list = SegmentList()
421
422    # They should not be merged
423    seg_list.occupy(0, 1, "code")
424    seg_list.occupy(1, 2, "data")
425
426    nose.tools.assert_equal(len(seg_list), 2)
427    nose.tools.assert_equal(seg_list._list[0].start, 0)
428    nose.tools.assert_equal(seg_list._list[0].end, 1)
429    nose.tools.assert_equal(seg_list._list[1].start, 1)
430    nose.tools.assert_equal(seg_list._list[1].end, 3)
431
432def test_segment_list_3():
433    seg_list = SegmentList()
434
435    # They should be merged, and create three different segments
436    seg_list.occupy(0, 5, "code")
437    seg_list.occupy(5, 5, "code")
438    seg_list.occupy(1, 2, "data")
439
440    nose.tools.assert_equal(len(seg_list), 3)
441
442    nose.tools.assert_equal(seg_list._list[0].start, 0)
443    nose.tools.assert_equal(seg_list._list[0].end, 1)
444    nose.tools.assert_equal(seg_list._list[0].sort, "code")
445
446    nose.tools.assert_equal(seg_list._list[1].start, 1)
447    nose.tools.assert_equal(seg_list._list[1].end, 3)
448    nose.tools.assert_equal(seg_list._list[1].sort, "data")
449
450    nose.tools.assert_equal(seg_list._list[2].start, 3)
451    nose.tools.assert_equal(seg_list._list[2].end, 10)
452    nose.tools.assert_equal(seg_list._list[2].sort, "code")
453
454def test_segment_list_4():
455    seg_list = SegmentList()
456
457    seg_list.occupy(5, 5, "code")
458    seg_list.occupy(4, 1, "code")
459    seg_list.occupy(2, 2, "code")
460
461    nose.tools.assert_equal(len(seg_list), 1)
462    nose.tools.assert_equal(seg_list._list[0].start, 2)
463    nose.tools.assert_equal(seg_list._list[0].end, 10)
464
465def test_segment_list_5():
466    seg_list = SegmentList()
467
468    seg_list.occupy(5, 5, "data")
469    seg_list.occupy(4, 1, "code")
470    seg_list.occupy(2, 2, "data")
471
472    nose.tools.assert_equal(len(seg_list), 3)
473    nose.tools.assert_equal(seg_list._list[0].start, 2)
474    nose.tools.assert_equal(seg_list._list[2].end, 10)
475
476    seg_list.occupy(3, 2, "data")
477
478    nose.tools.assert_equal(len(seg_list), 1)
479    nose.tools.assert_equal(seg_list._list[0].start, 2)
480    nose.tools.assert_equal(seg_list._list[0].end, 10)
481
482def test_segment_list_6():
483    seg_list = SegmentList()
484
485    seg_list.occupy(10, 20, "code")
486    seg_list.occupy(9, 2, "data")
487
488    nose.tools.assert_equal(len(seg_list), 2)
489    nose.tools.assert_equal(seg_list._list[0].start, 9)
490    nose.tools.assert_equal(seg_list._list[0].end, 11)
491    nose.tools.assert_equal(seg_list._list[0].sort, 'data')
492
493    nose.tools.assert_equal(seg_list._list[1].start, 11)
494    nose.tools.assert_equal(seg_list._list[1].end, 30)
495    nose.tools.assert_equal(seg_list._list[1].sort, 'code')
496
497
498#
499# Serialization
500#
501
502def test_serialization_cfgnode():
503    path = os.path.join(test_location, "x86_64", "fauxware")
504    proj = angr.Project(path, auto_load_libs=False)
505
506    cfg = proj.analyses.CFGFast()
507    # the first node
508    node = cfg.model.get_any_node(proj.entry)
509    nose.tools.assert_is_not_none(node)
510
511    b = node.serialize()
512    nose.tools.assert_greater(len(b), 0)
513    new_node = CFGNode.parse(b)
514    nose.tools.assert_equal(new_node.addr, node.addr)
515    nose.tools.assert_equal(new_node.size, node.size)
516    nose.tools.assert_equal(new_node.block_id, node.block_id)
517
518
519def test_serialization_cfgfast():
520    path = os.path.join(test_location, "x86_64", "fauxware")
521    proj1 = angr.Project(path, auto_load_libs=False)
522    proj2 = angr.Project(path, auto_load_libs=False)
523
524    cfg = proj1.analyses.CFGFast()
525    # parse the entire graph
526    b = cfg.model.serialize()
527    nose.tools.assert_greater(len(b), 0)
528
529    # simulate importing a cfg from another tool
530    cfg_model = CFGModel.parse(b, cfg_manager=proj2.kb.cfgs)
531
532    nose.tools.assert_equal(len(cfg_model.graph.nodes), len(cfg.graph.nodes))
533    nose.tools.assert_equal(len(cfg_model.graph.edges), len(cfg.graph.edges))
534
535    n1 = cfg.model.get_any_node(proj1.entry)
536    n2 = cfg_model.get_any_node(proj1.entry)
537    nose.tools.assert_equal(n1, n2)
538
539
540#
541# CFG instance copy
542#
543
544def test_cfg_copy():
545    path = os.path.join(test_location, "cgc", "CADET_00002")
546    proj = angr.Project(path)
547
548    cfg = proj.analyses.CFGFast()
549    cfg_copy = cfg.copy()
550    for attribute in cfg_copy.__dict__:
551        if attribute in ['_graph', '_seg_list', '_model']:
552            continue
553        nose.tools.assert_equal(getattr(cfg, attribute), getattr(cfg_copy, attribute))
554
555    nose.tools.assert_not_equal(id(cfg.model), id(cfg_copy.model))
556    nose.tools.assert_not_equal(id(cfg.model.graph), id(cfg_copy.model.graph))
557    nose.tools.assert_not_equal(id(cfg._seg_list), id(cfg_copy._seg_list))
558
559#
560# Alignment bytes
561#
562
563def test_cfg_0_pe_msvc_debug_nocc():
564    filename = os.path.join('windows', 'msvc_cfg_0_debug.exe')
565    proj = angr.Project(os.path.join(test_location, 'x86_64', filename), auto_load_libs=False)
566    cfg = proj.analyses.CFGFast()
567
568    # make sure 0x140015683 is marked as alignments
569    sort = cfg._seg_list.occupied_by_sort(0x140016583)
570    nose.tools.assert_equal(sort, "alignment", "Address 0x140016583 is not marked as alignment. The CC detection is "
571                                               "probably failing.")
572
573    nose.tools.assert_not_in(0x140015683, cfg.kb.functions)
574
575#
576# Indirect jump resolvers
577#
578
579# For test cases for jump table resolver, please refer to test_jumptables.py
580
581def test_resolve_x86_elf_pic_plt():
582    path = os.path.join(test_location, 'i386', 'fauxware_pie')
583    proj = angr.Project(path, load_options={'auto_load_libs': False})
584
585    cfg = proj.analyses.CFGFast()
586
587    # puts
588    puts_node = cfg.model.get_any_node(0x4005b0)
589    nose.tools.assert_is_not_none(puts_node)
590
591    # there should be only one successor, which jumps to SimProcedure puts
592    nose.tools.assert_equal(len(puts_node.successors), 1)
593    puts_successor = puts_node.successors[0]
594    nose.tools.assert_equal(puts_successor.addr, proj.loader.find_symbol('puts').rebased_addr)
595
596    # the SimProcedure puts should have more than one successors, which are all return targets
597    nose.tools.assert_equal(len(puts_successor.successors), 3)
598    simputs_successor = puts_successor.successors
599    return_targets = set(a.addr for a in simputs_successor)
600    nose.tools.assert_equal(return_targets, { 0x400800, 0x40087e, 0x4008b6 })
601
602#
603# Function names
604#
605
606def test_function_names_for_unloaded_libraries():
607    path = os.path.join(test_location, 'i386', 'fauxware_pie')
608    proj = angr.Project(path, load_options={'auto_load_libs': False})
609
610    cfg = proj.analyses.CFGFast()
611
612    function_names = [ f.name if not f.is_plt else 'plt_' + f.name for f in cfg.functions.values() ]
613
614    nose.tools.assert_in('plt_puts', function_names)
615    nose.tools.assert_in('plt_read', function_names)
616    nose.tools.assert_in('plt___stack_chk_fail', function_names)
617    nose.tools.assert_in('plt_exit', function_names)
618    nose.tools.assert_in('puts', function_names)
619    nose.tools.assert_in('read', function_names)
620    nose.tools.assert_in('__stack_chk_fail', function_names)
621    nose.tools.assert_in('exit', function_names)
622
623#
624# Basic blocks
625#
626
627def test_block_instruction_addresses_armhf():
628    path = os.path.join(test_location, 'armhf', 'fauxware')
629    proj = angr.Project(path, auto_load_libs=False)
630
631    cfg = proj.analyses.CFGFast()
632
633    main_func = cfg.kb.functions['main']
634
635    # all instruction addresses of the block must be odd
636    block = next((b for b in main_func.blocks if b.addr == main_func.addr))
637
638    nose.tools.assert_equal(len(block.instruction_addrs), 12)
639    for instr_addr in block.instruction_addrs:
640        nose.tools.assert_true(instr_addr % 2 == 1)
641
642    main_node = cfg.model.get_any_node(main_func.addr)
643    nose.tools.assert_is_not_none(main_node)
644    nose.tools.assert_equal(len(main_node.instruction_addrs), 12)
645    for instr_addr in main_node.instruction_addrs:
646        nose.tools.assert_true(instr_addr % 2 == 1)
647
648#
649# Tail-call optimization detection
650#
651
652def test_tail_call_optimization_detection_armel():
653
654    # GitHub issue #1286
655
656    path = os.path.join(test_location, 'armel', 'Nucleo_read_hyperterminal-stripped.elf')
657    proj = angr.Project(path, auto_load_libs=False)
658
659    cfg = proj.analyses.CFGFast(resolve_indirect_jumps=True,
660                                force_complete_scan=False,
661                                normalize=True,
662                                symbols=False,
663                                detect_tail_calls=True
664                                )
665
666    all_func_addrs = set(cfg.functions.keys())
667    nose.tools.assert_not_in(0x80010b5, all_func_addrs, "0x80010b5 is inside Reset_Handler().")
668    nose.tools.assert_not_in(0x8003ef9, all_func_addrs, "0x8003ef9 is inside memcpy().")
669    nose.tools.assert_not_in(0x8008419, all_func_addrs, "0x8008419 is inside __mulsf3().")
670
671    # Functions that are jumped to from tail-calls
672    tail_call_funcs = [ 0x8002bc1, 0x80046c1, 0x8000281, 0x8001bdb, 0x8002839, 0x80037ad, 0x8002c09, 0x8004165,
673                        0x8004be1, 0x8002eb1 ]
674    for member in tail_call_funcs:
675        nose.tools.assert_in(member, all_func_addrs)
676
677    # also test for tailcall return addresses
678
679    # mapping of return blocks to return addrs that are the actual callers of certain tail-calls endpoints
680    tail_call_return_addrs = {0x8002bd9: [0x800275f],   # 0x8002bc1
681                              0x80046d7: [0x800275f],   # 0x80046c1
682                              0x80046ed: [0x800275f],   # 0x80046c1
683                              0x8001be7: [0x800068d, 0x8000695],   # 0x8001bdb ??
684                              0x800284d: [0x800028b, 0x80006e1, 0x80006e7],   # 0x8002839
685                              0x80037f5: [0x800270b, 0x8002733, 0x8002759, 0x800098f, 0x8000997], # 0x80037ad
686                              0x80037ef: [0x800270b, 0x8002733, 0x8002759, 0x800098f, 0x8000997], # 0x80037ad
687                              0x8002cc9: [0x8002d3b, 0x8002b99, 0x8002e9f, 0x80041ad,
688                                          0x8004c87, 0x8004d35, 0x8002efb, 0x8002be9,
689                                          0x80046eb, 0x800464f, 0x8002a09, 0x800325f,
690                                          0x80047c1],    # 0x8002c09
691                              0x8004183: [0x8002713],    # 0x8004165
692                              0x8004c31: [0x8002713],    # 0x8004be1
693                              0x8004c69: [0x8002713],    # 0x8004be1
694                              0x8002ef1: [0x800273b]}    # 0x8002eb1
695
696    # check all expected return addrs are present
697    for returning_block_addr, expected_return_addrs in tail_call_return_addrs.items():
698        returning_block = cfg.model.get_any_node(returning_block_addr)
699        return_block_addrs = [rb.addr for rb in cfg.model.get_successors(returning_block)]
700        msg = "%x: unequal sizes of expected_addrs [%d] and return_block_addrs [%d]" % \
701                            (returning_block_addr, len(expected_return_addrs), len(return_block_addrs))
702        nose.tools.assert_equal(len(return_block_addrs), len(expected_return_addrs), msg)
703        for expected_addr in expected_return_addrs:
704                msg = "expected retaddr %x not found for returning_block %x" % \
705                                        (expected_addr, returning_block_addr)
706                nose.tools.assert_in(expected_addr, return_block_addrs, msg)
707
708#
709# Incorrect function-leading blocks merging
710#
711
712def test_function_leading_blocks_merging():
713
714    # GitHub issue #1312
715
716    path = os.path.join(test_location, 'armel', 'Nucleo_read_hyperterminal-stripped.elf')
717    proj = angr.Project(path, arch=archinfo.ArchARMCortexM(), auto_load_libs=False)
718
719    cfg = proj.analyses.CFGFast(resolve_indirect_jumps=True,
720                                force_complete_scan=True,
721                                normalize=True,
722                                symbols=False,
723                                detect_tail_calls=True
724                                )
725
726    nose.tools.assert_in(0x8000799, cfg.kb.functions, "Function 0x8000799 does not exist.")
727    nose.tools.assert_not_in(0x800079b, cfg.kb.functions, "Function 0x800079b does not exist.")
728    nose.tools.assert_not_in(0x800079b, cfg.kb.functions[0x8000799].block_addrs_set,
729                             "Block 0x800079b is found, but it should not exist.")
730    nose.tools.assert_in(0x8000799, cfg.kb.functions[0x8000799].block_addrs_set,
731                         "Block 0x8000799 is not found inside function 0x8000799.")
732    nose.tools.assert_equal(next(iter(b for b in cfg.kb.functions[0x8000799].blocks if b.addr == 0x8000799)).size, 6,
733                            "Block 0x800079b has an incorrect size.")
734
735
736#
737# Blanket
738#
739
740def test_blanket_fauxware():
741
742    path = os.path.join(test_location, 'x86_64', 'fauxware')
743    proj = angr.Project(path, auto_load_libs=False)
744
745    cfg = proj.analyses.CFGFast()
746
747    cfb = proj.analyses.CFBlanket(kb=cfg.kb)
748
749    # it should raise a key error when calling floor_addr on address 0 because nothing is mapped there
750    nose.tools.assert_raises(KeyError, cfb.floor_addr, 0)
751    # an instruction (or a block) starts at 0x400580
752    nose.tools.assert_equal(cfb.floor_addr(0x400581), 0x400580)
753    # a block ends at 0x4005a9 (exclusive)
754    nose.tools.assert_equal(cfb.ceiling_addr(0x400581), 0x4005a9)
755
756
757#
758# Data references
759#
760
761def test_data_references_x86_64():
762
763    path = os.path.join(test_location, 'x86_64', 'fauxware')
764    proj = angr.Project(path, auto_load_libs=False)
765
766    cfg = proj.analyses.CFGFast(data_references=True)
767
768    memory_data = cfg.memory_data
769    # There is no code reference
770    code_ref_count = len([d for d in memory_data.values() if d.sort == MemoryDataSort.CodeReference])
771    nose.tools.assert_greater_equal(code_ref_count, 0, msg="There should be no code reference.")
772
773    # There are at least 2 pointer arrays
774    ptr_array_count = len([d for d in memory_data.values() if d.sort == MemoryDataSort.PointerArray])
775    nose.tools.assert_greater(ptr_array_count, 2, msg="Missing some pointer arrays.")
776
777    nose.tools.assert_in(0x4008d0, memory_data)
778    sneaky_str = memory_data[0x4008d0]
779    nose.tools.assert_equal(sneaky_str.sort, "string")
780    nose.tools.assert_equal(sneaky_str.content, b"SOSNEAKY")
781
782
783def test_data_references_mipsel():
784
785    path = os.path.join(test_location, 'mipsel', 'fauxware')
786    proj = angr.Project(path, auto_load_libs=False)
787
788    cfg = proj.analyses.CFGFast(data_references=True)
789
790    memory_data = cfg.memory_data
791    # There is no code reference
792    code_ref_count = len([d for d in memory_data.values() if d.sort == MemoryDataSort.CodeReference])
793    nose.tools.assert_greater_equal(code_ref_count, 0, msg="There should be no code reference.")
794
795    # There are at least 2 pointer arrays
796    ptr_array_count = len([d for d in memory_data.values() if d.sort == MemoryDataSort.PointerArray])
797    nose.tools.assert_greater_equal(ptr_array_count, 1, msg="Missing some pointer arrays.")
798
799    nose.tools.assert_in(0x400c00, memory_data)
800    sneaky_str = memory_data[0x400c00]
801    nose.tools.assert_equal(sneaky_str.sort, "string")
802    nose.tools.assert_equal(sneaky_str.content, b"SOSNEAKY")
803
804    nose.tools.assert_in(0x400c0c, memory_data)
805    str_ = memory_data[0x400c0c]
806    nose.tools.assert_equal(str_.sort, "string")
807    nose.tools.assert_equal(str_.content, b"Welcome to the admin console, trusted user!")
808
809    nose.tools.assert_in(0x400c38, memory_data)
810    str_ = memory_data[0x400c38]
811    nose.tools.assert_equal(str_.sort, "string")
812    nose.tools.assert_equal(str_.content, b"Go away!")
813
814    nose.tools.assert_in(0x400c44, memory_data)
815    str_ = memory_data[0x400c44]
816    nose.tools.assert_equal(str_.sort, "string")
817    nose.tools.assert_equal(str_.content, b"Username: ")
818
819    nose.tools.assert_in(0x400c50, memory_data)
820    str_ = memory_data[0x400c50]
821    nose.tools.assert_equal(str_.sort, "string")
822    nose.tools.assert_equal(str_.content, b"Password: ")
823
824
825#
826# CFG with patches
827#
828
829def test_cfg_with_patches():
830
831    path = os.path.join(test_location, 'x86_64', 'fauxware')
832    proj = angr.Project(path, auto_load_libs=False)
833
834    cfg = proj.analyses.CFGFast()
835    auth_func = cfg.functions['authenticate']
836    auth_func_addr = auth_func.addr
837
838    # Take the authenticate function and add a retn patch for its very first block
839    kb = angr.KnowledgeBase(proj)
840    kb.patches.add_patch(auth_func_addr, b"\xc3")
841
842    # with this patch, there should only be one block with one instruction in authenticate()
843    _ = proj.analyses.CFGFast(kb=kb, use_patches=True)
844    patched_func = kb.functions['authenticate']
845    nose.tools.assert_equal(len(patched_func.block_addrs_set), 1)
846    block = patched_func._get_block(auth_func_addr)
847    nose.tools.assert_equal(len(block.instruction_addrs), 1)
848
849    # let's try to patch the second instruction of that function to ret
850    kb = angr.KnowledgeBase(proj)
851    kb.patches.add_patch(auth_func._get_block(auth_func_addr).instruction_addrs[1], b"\xc3")
852
853    # with this patch, there should only be one block with two instructions in authenticate()
854    _ = proj.analyses.CFGFast(kb=kb, use_patches=True)
855    patched_func = kb.functions['authenticate']
856    nose.tools.assert_equal(len(patched_func.block_addrs_set), 1)
857    block = patched_func._get_block(auth_func_addr)
858    nose.tools.assert_equal(len(block.instruction_addrs), 2)
859
860    # finally, if we generate a new CFG on a KB without any patch, we should still see the normal function (with 10
861    # blocks)
862    kb = angr.KnowledgeBase(proj)
863    _ = proj.analyses.CFGFast(kb=kb, use_patches=True)
864    not_patched_func = kb.functions['authenticate']
865    nose.tools.assert_equal(len(not_patched_func.block_addrs_set), 10)
866
867
868def test_unresolvable_targets():
869
870    path = os.path.join(test_location, 'cgc', 'CADET_00002')
871    proj = angr.Project(path)
872
873    proj.analyses.CFGFast(normalize=True)
874    func = proj.kb.functions[0x080489E0]
875
876    true_endpoint_addrs = {0x8048bbc, 0x8048af5, 0x8048b5c, 0x8048a41, 0x8048aa8}
877    endpoint_addrs = {node.addr for node in func.endpoints}
878    nose.tools.assert_equal(len(endpoint_addrs.symmetric_difference(true_endpoint_addrs)), 0)
879
880
881def test_indirect_jump_to_outside():
882
883    # an indirect jump might be jumping to outside as well
884    path = os.path.join(test_location, "mipsel", "libndpi.so.4.0.0")
885    proj = angr.Project(path, auto_load_libs=False)
886
887    cfg = proj.analyses.CFGFast()
888
889    nose.tools.assert_equal(len(list(cfg.functions[0x404ee4].blocks)), 3)
890    nose.tools.assert_equal(set(ep.addr for ep in cfg.functions[0x404ee4].endpoints), { 0x404f00, 0x404f08 })
891
892
893def test_plt_stub_has_one_jumpout_site():
894
895    # each PLT stub must have exactly one jumpout site
896    path = os.path.join(test_location, "x86_64", "1after909")
897    proj = angr.Project(path, auto_load_libs=False)
898    cfg = proj.analyses.CFGFast()
899
900    for func in cfg.kb.functions.values():
901        if func.is_plt:
902            assert len(func.jumpout_sites) == 1
903
904
905def test_generate_special_info():
906
907    path = os.path.join(test_location, "mipsel", "fauxware")
908    proj = angr.Project(path, auto_load_libs=False)
909
910    cfg = proj.analyses.CFGFast()
911
912    nose.tools.assert_true(any(func.info for func in cfg.functions.values()))
913    nose.tools.assert_equal(cfg.functions['main'].info['gp'], 0x418ca0)
914
915
916def test_load_from_shellcode():
917
918    proj = angr.load_shellcode('loop: dec ecx; jnz loop; ret', 'x86')
919    cfg = proj.analyses.CFGFast()
920
921    nose.tools.assert_equal(len(cfg.model.nodes()), 2)
922
923
924def run_all():
925
926    g = globals()
927    segmentlist_tests = [ v for k, v in g.items() if k.startswith("test_segment_list_") and hasattr(v, "__call__")]
928
929    for func in segmentlist_tests:
930        print(func.__name__)
931        func()
932
933    test_serialization_cfgnode()
934    test_serialization_cfgfast()
935
936    for args in test_cfg_0():
937        print(args[0].__name__)
938        args[0](*args[1:])
939
940    for args in test_cfg_0_pe():
941        print(args[0].__name__)
942        args[0](*args[1:])
943
944    for args in test_fauxware():
945        print(args[0].__name__)
946        args[0](*args[1:])
947
948    for args in test_cfg_loop_unrolling():
949        print(args[0].__name__)
950        args[0](*args[1:])
951
952    for args in test_cfg_switches():
953        args[0](*args[1:])
954
955    test_resolve_x86_elf_pic_plt()
956    test_function_names_for_unloaded_libraries()
957    test_block_instruction_addresses_armhf()
958    test_tail_call_optimization_detection_armel()
959    test_blanket_fauxware()
960    test_data_references_x86_64()
961    test_data_references_mipsel()
962    test_function_leading_blocks_merging()
963    test_cfg_with_patches()
964    test_indirect_jump_to_outside()
965    test_generate_special_info()
966    test_plt_stub_has_one_jumpout_site()
967    test_load_from_shellcode()
968
969
970def main():
971    if len(sys.argv) > 1:
972        g = globals().copy()
973
974        r = g['test_' + sys.argv[1]]()
975
976        if r is not None:
977            for func_and_args in r:
978                func, args = func_and_args[0], func_and_args[1:]
979                func(*args)
980    else:
981        run_all()
982
983if __name__ == "__main__":
984    # logging.getLogger('angr.analyses.cfg.cfg_fast').setLevel(logging.DEBUG)
985    main()
986