1import os 2import logging 3import sys 4 5from nose.plugins.attrib import attr 6import nose.tools 7 8import archinfo 9import angr 10 11from angr.analyses.cfg.cfg_fast import SegmentList 12from angr.knowledge_plugins.cfg import CFGNode, CFGModel, MemoryDataSort 13 14l = logging.getLogger("angr.tests.test_cfgfast") 15 16test_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'binaries', 'tests') 17 18def cfg_fast_functions_check(arch, binary_path, func_addrs, func_features): 19 """ 20 Generate a fast CFG on the given binary, and test if all specified functions are found 21 22 :param str arch: the architecture, will be prepended to `binary_path` 23 :param str binary_path: path to the binary under the architecture directory 24 :param dict func_addrs: A collection of function addresses that should be recovered 25 :param dict func_features: A collection of features for some of the functions 26 :return: None 27 """ 28 29 path = os.path.join(test_location, arch, binary_path) 30 proj = angr.Project(path, load_options={'auto_load_libs': False}) 31 32 cfg = proj.analyses.CFGFast() 33 nose.tools.assert_true(set(cfg.kb.functions.keys()).issuperset(func_addrs)) 34 35 for func_addr, feature_dict in func_features.items(): 36 returning = feature_dict.get("returning", "undefined") 37 if returning != "undefined": 38 nose.tools.assert_is(cfg.kb.functions.function(addr=func_addr).returning, returning) 39 40 # Segment only 41 cfg = proj.analyses.CFGFast(force_segment=True) 42 nose.tools.assert_true(set(cfg.kb.functions.keys()).issuperset(func_addrs)) 43 44 for func_addr, feature_dict in func_features.items(): 45 returning = feature_dict.get("returning", "undefined") 46 if returning != "undefined": 47 nose.tools.assert_is(cfg.kb.functions.function(addr=func_addr).returning, returning) 48 49 # with normalization enabled 50 cfg = proj.analyses.CFGFast(force_segment=True, normalize=True) 51 nose.tools.assert_true(set(cfg.kb.functions.keys()).issuperset(func_addrs)) 52 53 for func_addr, feature_dict in func_features.items(): 54 returning = feature_dict.get("returning", "undefined") 55 if returning != "undefined": 56 nose.tools.assert_is(cfg.kb.functions.function(addr=func_addr).returning, returning) 57 58def cfg_fast_edges_check(arch, binary_path, edges): 59 """ 60 Generate a fast CFG on the given binary, and test if all edges are found. 61 62 :param str arch: the architecture, will be prepended to `binary_path` 63 :param str binary_path: path to the binary under the architecture directory 64 :param list edges: a list of edges 65 :return: None 66 """ 67 68 path = os.path.join(test_location, arch, binary_path) 69 proj = angr.Project(path, load_options={'auto_load_libs': False}) 70 71 cfg = proj.analyses.CFGFast() 72 73 for src, dst in edges: 74 src_node = cfg.model.get_any_node(src) 75 dst_node = cfg.model.get_any_node(dst) 76 nose.tools.assert_is_not_none(src_node, msg="CFG node 0x%x is not found." % src) 77 nose.tools.assert_is_not_none(dst_node, msg="CFG node 0x%x is not found." % dst) 78 nose.tools.assert_in(dst_node, src_node.successors, 79 msg="CFG edge %s-%s is not found." % (src_node, dst_node) 80 ) 81 82def test_cfg_0(): 83 filename = 'cfg_0' 84 functions = { 85 'x86_64': { 86 0x400410, 87 0x400420, 88 0x400430, 89 0x400440, 90 0x400470, 91 0x40052c, 92 0x40053c, 93 } 94 } 95 arches = functions.keys() 96 97 function_features = { 98 'x86_64': {} 99 } 100 101 for arch in arches: 102 yield cfg_fast_functions_check, arch, filename, functions[arch], function_features[arch] 103 104def test_cfg_0_pe(): 105 filename = 'cfg_0_pe' 106 functions = { 107 'x86_64': { 108 # 0x40150a, # currently angr identifies 0x40150e due to the way _func_addrs_from_prologues() is 109 # implemented. this issue can be resolved with a properly implemented approach like Byte-Weight 110 0x4014f0, 111 } 112 } 113 arches = functions.keys() 114 115 function_features = { 116 'x86_64': {} 117 } 118 119 for arch in arches: 120 yield cfg_fast_functions_check, arch, filename, functions[arch], function_features[arch] 121 122 123@attr(speed='slow') 124def test_busybox(): 125 filename = "busybox" 126 edges = { 127 "mipsel": { 128 (0x4091ec, 0x408de0), 129 (0x449acc, 0x5003b8), # call to putenv. address of putenv may change in the future 130 (0x467cfc, 0x500014), # call to free. address of free may change in the future 131 } 132 } 133 134 for arch, edges_ in edges.items(): 135 yield cfg_fast_edges_check, arch, filename, edges_ 136 137 138def test_fauxware(): 139 filename = "fauxware" 140 functions = { 141 'x86_64': { 142 0x4004e0, 143 0x400510, 144 0x400520, 145 0x400530, 146 0x400540, 147 0x400550, 148 0x400560, 149 0x400570, # .plt._exit 150 0x400580, # _start 151 0x4005ac, 152 0x4005d0, 153 0x400640, 154 0x400664, 155 0x4006ed, 156 0x4006fd, 157 0x40071d, # main 158 0x4007e0, 159 0x400870, 160 0x400880, 161 0x4008b8, 162 }, 163 'mips': { 164 0x400534, # _init 165 0x400574, 166 0x400598, 167 0x4005d0, # _ftext 168 0x4005dc, 169 0x400630, # __do_global_dtors_aux 170 0x4006d4, # frame_dummy 171 0x400708, 172 0x400710, # authenticate 173 0x400814, 174 0x400814, # accepted 175 0x400868, # rejected 176 0x4008c0, # main 177 0x400a34, 178 0x400a48, # __libc_csu_init 179 0x400af8, 180 0x400b00, # __do_global_ctors_aux 181 0x400b58, 182 ### plt entries 183 0x400b60, # strcmp 184 0x400b70, # read 185 0x400b80, # printf 186 0x400b90, # puts 187 0x400ba0, # exit 188 0x400bb0, # open 189 0x400bc0, # __libc_start_main 190 }, 191 } 192 193 function_features = { 194 'x86_64': 195 { 196 0x400570: # plt.exit 197 { 198 "returning": False 199 }, 200 0x4006fd: # rejected 201 { 202 "returning": False 203 } 204 }, 205 'mips': 206 { 207 0x400868: # rejected 208 { 209 "returning": False, 210 } 211 }, 212 } 213 214 return_edges = { 215 'x86_64': 216 [ 217 (0x4006fb, 0x4007c7) # return from accepted to main 218 ], 219 'mips': 220 [ 221 (0x40084c, 0x400a04) # returning edge from accepted to main 222 ], 223 } 224 225 arches = functions.keys() 226 227 for arch in arches: 228 yield cfg_fast_functions_check, arch, filename, functions[arch], function_features[arch] 229 yield cfg_fast_edges_check, arch, filename, return_edges[arch] 230 231def test_cfg_loop_unrolling(): 232 filename = "cfg_loop_unrolling" 233 edges = { 234 'x86_64': { 235 (0x400658, 0x400636), 236 (0x400658, 0x400661), 237 (0x400651, 0x400636), 238 (0x400651, 0x400661), 239 } 240 } 241 242 arches = edges.keys() 243 244 for arch in arches: 245 yield cfg_fast_edges_check, arch, filename, edges[arch] 246 247def test_cfg_switches(): 248 249 #logging.getLogger('angr.analyses.cfg.cfg_fast').setLevel(logging.INFO) 250 #logging.getLogger('angr.analyses.cfg.indirect_jump_resolvers.jumptable').setLevel(logging.DEBUG) 251 252 filename = "cfg_switches" 253 254 edges = { 255 'x86_64': { 256 # jump table 0 in func_0 257 (0x40053a, 0x400547), 258 (0x40053a, 0x400552), 259 (0x40053a, 0x40055d), 260 (0x40053a, 0x400568), 261 (0x40053a, 0x400573), 262 (0x40053a, 0x400580), 263 (0x40053a, 0x40058d), 264 # jump table 0 in func_1 265 (0x4005bc, 0x4005c9), 266 (0x4005bc, 0x4005d8), 267 (0x4005bc, 0x4005e7), 268 (0x4005bc, 0x4005f6), 269 (0x4005bc, 0x400605), 270 (0x4005bc, 0x400614), 271 (0x4005bc, 0x400623), 272 (0x4005bc, 0x400632), 273 (0x4005bc, 0x40063e), 274 (0x4005bc, 0x40064a), 275 (0x4005bc, 0x4006b0), 276 # jump table 1 in func_1 277 (0x40065a, 0x400667), 278 (0x40065a, 0x400673), 279 (0x40065a, 0x40067f), 280 (0x40065a, 0x40068b), 281 (0x40065a, 0x400697), 282 (0x40065a, 0x4006a3), 283 # jump table 0 in main 284 (0x4006e1, 0x4006ee), 285 (0x4006e1, 0x4006fa), 286 (0x4006e1, 0x40070b), 287 (0x4006e1, 0x40071c), 288 (0x4006e1, 0x40072d), 289 (0x4006e1, 0x40073e), 290 (0x4006e1, 0x40074f), 291 (0x4006e1, 0x40075b), 292 }, 293 'armel': { 294 # jump table 0 in func_0 295 (0x10434, 0x10488), 296 (0x10434, 0x104e8), 297 (0x10434, 0x10498), 298 (0x10434, 0x104a8), 299 (0x10434, 0x104b8), 300 (0x10434, 0x104c8), 301 (0x10434, 0x104d8), 302 (0x10454, 0x104e8), # default case 303 # jump table 0 in func_1 304 (0x10524, 0x105cc), 305 (0x10524, 0x106b4), 306 (0x10524, 0x105d8), 307 (0x10524, 0x105e4), 308 (0x10524, 0x105f0), 309 (0x10524, 0x105fc), 310 (0x10524, 0x10608), 311 (0x10524, 0x10614), 312 (0x10524, 0x10620), 313 (0x10524, 0x1062c), 314 (0x10524, 0x10638), 315 (0x10534, 0x106b4), # default case 316 # jump table 1 in func_1 317 (0x10650, 0x106a4), # default case 318 (0x10640, 0x10668), 319 (0x10640, 0x10674), 320 (0x10640, 0x10680), 321 (0x10640, 0x1068c), 322 (0x10640, 0x10698), 323 # jump table 0 in main 324 (0x10734, 0x107fc), 325 (0x10734, 0x10808), 326 (0x10734, 0x10818), 327 (0x10734, 0x10828), 328 (0x10734, 0x10838), 329 (0x10734, 0x10848), 330 (0x10734, 0x10858), 331 (0x10734, 0x10864), 332 (0x10744, 0x10864), # default case 333 }, 334 's390x': { 335 # jump table 0 in func_0 336 (0x4007d4, 0x4007ea), # case 1 337 (0x4007d4, 0x4007f4), # case 3 338 (0x4007d4, 0x4007fe), # case 5 339 (0x4007d4, 0x400808), # case 7 340 (0x4007d4, 0x400812), # case 9 341 (0x4007d4, 0x40081c), # case 12 342 (0x4007c0, 0x4007ca), # default case 343 # jump table 0 in func_1 344 (0x400872, 0x4008ae), # case 2 345 (0x400872, 0x4008be), # case 10 346 (0x400872, 0x4008ce), # case 12 347 (0x400872, 0x4008de), # case 14 348 (0x400872, 0x4008ee), # case 15 349 (0x400872, 0x4008fe), # case 16 350 (0x400872, 0x40090e), # case 22 351 (0x400872, 0x40091e), # case 24 352 (0x400872, 0x40092e), # case 28 353 (0x400872, 0x400888), # case 38 354 (0x400848, 0x400854), # default case (1) 355 (0x400872, 0x400854), # default case (2) 356 # jump table 1 in func_1 357 (0x40093e, 0x400984), # case 1 358 (0x40093e, 0x400974), # case 2 359 (0x40093e, 0x400964), # case 3 360 (0x40093e, 0x400954), # case 4 361 (0x40093e, 0x400994), # case 5 362 (0x400898, 0x40089e), # default case (1) 363 # jump table 0 in main 364 # case 1, 3, 5, 7, 9: optimized out 365 (0x400638, 0x40064e), # case 2 366 (0x400638, 0x400692), # case 4 367 (0x400638, 0x4006a4), # case 6 368 (0x400638, 0x40066e), # case 8 369 (0x400638, 0x400680), # case 10 370 # case 45: optimized out 371 (0x40062c, 0x40065c), # default case 372 } 373 } 374 375 arches = edges.keys() 376 377 for arch in arches: 378 yield cfg_fast_edges_check, arch, filename, edges[arch] 379 380 381def test_cfg_about_time(): 382 383 # This is to test the correctness of the PLT stub removal in CFGBase 384 proj = angr.Project(os.path.join(test_location, "x86_64", "about_time"), auto_load_libs=False) 385 cfg = proj.analyses.CFG() 386 387 # a PLT stub that should be removed 388 nose.tools.assert_not_in(0x401026, cfg.kb.functions) 389 # a PLT stub that should be removed 390 nose.tools.assert_not_in(0x4010a6, cfg.kb.functions) 391 # a PLT stub that should be removed 392 nose.tools.assert_not_in(0x40115e, cfg.kb.functions) 393 # the start function that should not be removed 394 nose.tools.assert_in(proj.entry, cfg.kb.functions) 395 396 397def test_segment_list_0(): 398 seg_list = SegmentList() 399 seg_list.occupy(0, 1, "code") 400 seg_list.occupy(2, 3, "code") 401 402 nose.tools.assert_equal(len(seg_list), 2) 403 nose.tools.assert_equal(seg_list._list[0].end, 1) 404 nose.tools.assert_equal(seg_list._list[1].end, 5) 405 nose.tools.assert_equal(seg_list.is_occupied(4), True) 406 nose.tools.assert_equal(seg_list.is_occupied(5), False) 407 408def test_segment_list_1(): 409 seg_list = SegmentList() 410 411 # They should be merged 412 seg_list.occupy(0, 1, "code") 413 seg_list.occupy(1, 2, "code") 414 415 nose.tools.assert_equal(len(seg_list), 1) 416 nose.tools.assert_equal(seg_list._list[0].start, 0) 417 nose.tools.assert_equal(seg_list._list[0].end, 3) 418 419def test_segment_list_2(): 420 seg_list = SegmentList() 421 422 # They should not be merged 423 seg_list.occupy(0, 1, "code") 424 seg_list.occupy(1, 2, "data") 425 426 nose.tools.assert_equal(len(seg_list), 2) 427 nose.tools.assert_equal(seg_list._list[0].start, 0) 428 nose.tools.assert_equal(seg_list._list[0].end, 1) 429 nose.tools.assert_equal(seg_list._list[1].start, 1) 430 nose.tools.assert_equal(seg_list._list[1].end, 3) 431 432def test_segment_list_3(): 433 seg_list = SegmentList() 434 435 # They should be merged, and create three different segments 436 seg_list.occupy(0, 5, "code") 437 seg_list.occupy(5, 5, "code") 438 seg_list.occupy(1, 2, "data") 439 440 nose.tools.assert_equal(len(seg_list), 3) 441 442 nose.tools.assert_equal(seg_list._list[0].start, 0) 443 nose.tools.assert_equal(seg_list._list[0].end, 1) 444 nose.tools.assert_equal(seg_list._list[0].sort, "code") 445 446 nose.tools.assert_equal(seg_list._list[1].start, 1) 447 nose.tools.assert_equal(seg_list._list[1].end, 3) 448 nose.tools.assert_equal(seg_list._list[1].sort, "data") 449 450 nose.tools.assert_equal(seg_list._list[2].start, 3) 451 nose.tools.assert_equal(seg_list._list[2].end, 10) 452 nose.tools.assert_equal(seg_list._list[2].sort, "code") 453 454def test_segment_list_4(): 455 seg_list = SegmentList() 456 457 seg_list.occupy(5, 5, "code") 458 seg_list.occupy(4, 1, "code") 459 seg_list.occupy(2, 2, "code") 460 461 nose.tools.assert_equal(len(seg_list), 1) 462 nose.tools.assert_equal(seg_list._list[0].start, 2) 463 nose.tools.assert_equal(seg_list._list[0].end, 10) 464 465def test_segment_list_5(): 466 seg_list = SegmentList() 467 468 seg_list.occupy(5, 5, "data") 469 seg_list.occupy(4, 1, "code") 470 seg_list.occupy(2, 2, "data") 471 472 nose.tools.assert_equal(len(seg_list), 3) 473 nose.tools.assert_equal(seg_list._list[0].start, 2) 474 nose.tools.assert_equal(seg_list._list[2].end, 10) 475 476 seg_list.occupy(3, 2, "data") 477 478 nose.tools.assert_equal(len(seg_list), 1) 479 nose.tools.assert_equal(seg_list._list[0].start, 2) 480 nose.tools.assert_equal(seg_list._list[0].end, 10) 481 482def test_segment_list_6(): 483 seg_list = SegmentList() 484 485 seg_list.occupy(10, 20, "code") 486 seg_list.occupy(9, 2, "data") 487 488 nose.tools.assert_equal(len(seg_list), 2) 489 nose.tools.assert_equal(seg_list._list[0].start, 9) 490 nose.tools.assert_equal(seg_list._list[0].end, 11) 491 nose.tools.assert_equal(seg_list._list[0].sort, 'data') 492 493 nose.tools.assert_equal(seg_list._list[1].start, 11) 494 nose.tools.assert_equal(seg_list._list[1].end, 30) 495 nose.tools.assert_equal(seg_list._list[1].sort, 'code') 496 497 498# 499# Serialization 500# 501 502def test_serialization_cfgnode(): 503 path = os.path.join(test_location, "x86_64", "fauxware") 504 proj = angr.Project(path, auto_load_libs=False) 505 506 cfg = proj.analyses.CFGFast() 507 # the first node 508 node = cfg.model.get_any_node(proj.entry) 509 nose.tools.assert_is_not_none(node) 510 511 b = node.serialize() 512 nose.tools.assert_greater(len(b), 0) 513 new_node = CFGNode.parse(b) 514 nose.tools.assert_equal(new_node.addr, node.addr) 515 nose.tools.assert_equal(new_node.size, node.size) 516 nose.tools.assert_equal(new_node.block_id, node.block_id) 517 518 519def test_serialization_cfgfast(): 520 path = os.path.join(test_location, "x86_64", "fauxware") 521 proj1 = angr.Project(path, auto_load_libs=False) 522 proj2 = angr.Project(path, auto_load_libs=False) 523 524 cfg = proj1.analyses.CFGFast() 525 # parse the entire graph 526 b = cfg.model.serialize() 527 nose.tools.assert_greater(len(b), 0) 528 529 # simulate importing a cfg from another tool 530 cfg_model = CFGModel.parse(b, cfg_manager=proj2.kb.cfgs) 531 532 nose.tools.assert_equal(len(cfg_model.graph.nodes), len(cfg.graph.nodes)) 533 nose.tools.assert_equal(len(cfg_model.graph.edges), len(cfg.graph.edges)) 534 535 n1 = cfg.model.get_any_node(proj1.entry) 536 n2 = cfg_model.get_any_node(proj1.entry) 537 nose.tools.assert_equal(n1, n2) 538 539 540# 541# CFG instance copy 542# 543 544def test_cfg_copy(): 545 path = os.path.join(test_location, "cgc", "CADET_00002") 546 proj = angr.Project(path) 547 548 cfg = proj.analyses.CFGFast() 549 cfg_copy = cfg.copy() 550 for attribute in cfg_copy.__dict__: 551 if attribute in ['_graph', '_seg_list', '_model']: 552 continue 553 nose.tools.assert_equal(getattr(cfg, attribute), getattr(cfg_copy, attribute)) 554 555 nose.tools.assert_not_equal(id(cfg.model), id(cfg_copy.model)) 556 nose.tools.assert_not_equal(id(cfg.model.graph), id(cfg_copy.model.graph)) 557 nose.tools.assert_not_equal(id(cfg._seg_list), id(cfg_copy._seg_list)) 558 559# 560# Alignment bytes 561# 562 563def test_cfg_0_pe_msvc_debug_nocc(): 564 filename = os.path.join('windows', 'msvc_cfg_0_debug.exe') 565 proj = angr.Project(os.path.join(test_location, 'x86_64', filename), auto_load_libs=False) 566 cfg = proj.analyses.CFGFast() 567 568 # make sure 0x140015683 is marked as alignments 569 sort = cfg._seg_list.occupied_by_sort(0x140016583) 570 nose.tools.assert_equal(sort, "alignment", "Address 0x140016583 is not marked as alignment. The CC detection is " 571 "probably failing.") 572 573 nose.tools.assert_not_in(0x140015683, cfg.kb.functions) 574 575# 576# Indirect jump resolvers 577# 578 579# For test cases for jump table resolver, please refer to test_jumptables.py 580 581def test_resolve_x86_elf_pic_plt(): 582 path = os.path.join(test_location, 'i386', 'fauxware_pie') 583 proj = angr.Project(path, load_options={'auto_load_libs': False}) 584 585 cfg = proj.analyses.CFGFast() 586 587 # puts 588 puts_node = cfg.model.get_any_node(0x4005b0) 589 nose.tools.assert_is_not_none(puts_node) 590 591 # there should be only one successor, which jumps to SimProcedure puts 592 nose.tools.assert_equal(len(puts_node.successors), 1) 593 puts_successor = puts_node.successors[0] 594 nose.tools.assert_equal(puts_successor.addr, proj.loader.find_symbol('puts').rebased_addr) 595 596 # the SimProcedure puts should have more than one successors, which are all return targets 597 nose.tools.assert_equal(len(puts_successor.successors), 3) 598 simputs_successor = puts_successor.successors 599 return_targets = set(a.addr for a in simputs_successor) 600 nose.tools.assert_equal(return_targets, { 0x400800, 0x40087e, 0x4008b6 }) 601 602# 603# Function names 604# 605 606def test_function_names_for_unloaded_libraries(): 607 path = os.path.join(test_location, 'i386', 'fauxware_pie') 608 proj = angr.Project(path, load_options={'auto_load_libs': False}) 609 610 cfg = proj.analyses.CFGFast() 611 612 function_names = [ f.name if not f.is_plt else 'plt_' + f.name for f in cfg.functions.values() ] 613 614 nose.tools.assert_in('plt_puts', function_names) 615 nose.tools.assert_in('plt_read', function_names) 616 nose.tools.assert_in('plt___stack_chk_fail', function_names) 617 nose.tools.assert_in('plt_exit', function_names) 618 nose.tools.assert_in('puts', function_names) 619 nose.tools.assert_in('read', function_names) 620 nose.tools.assert_in('__stack_chk_fail', function_names) 621 nose.tools.assert_in('exit', function_names) 622 623# 624# Basic blocks 625# 626 627def test_block_instruction_addresses_armhf(): 628 path = os.path.join(test_location, 'armhf', 'fauxware') 629 proj = angr.Project(path, auto_load_libs=False) 630 631 cfg = proj.analyses.CFGFast() 632 633 main_func = cfg.kb.functions['main'] 634 635 # all instruction addresses of the block must be odd 636 block = next((b for b in main_func.blocks if b.addr == main_func.addr)) 637 638 nose.tools.assert_equal(len(block.instruction_addrs), 12) 639 for instr_addr in block.instruction_addrs: 640 nose.tools.assert_true(instr_addr % 2 == 1) 641 642 main_node = cfg.model.get_any_node(main_func.addr) 643 nose.tools.assert_is_not_none(main_node) 644 nose.tools.assert_equal(len(main_node.instruction_addrs), 12) 645 for instr_addr in main_node.instruction_addrs: 646 nose.tools.assert_true(instr_addr % 2 == 1) 647 648# 649# Tail-call optimization detection 650# 651 652def test_tail_call_optimization_detection_armel(): 653 654 # GitHub issue #1286 655 656 path = os.path.join(test_location, 'armel', 'Nucleo_read_hyperterminal-stripped.elf') 657 proj = angr.Project(path, auto_load_libs=False) 658 659 cfg = proj.analyses.CFGFast(resolve_indirect_jumps=True, 660 force_complete_scan=False, 661 normalize=True, 662 symbols=False, 663 detect_tail_calls=True 664 ) 665 666 all_func_addrs = set(cfg.functions.keys()) 667 nose.tools.assert_not_in(0x80010b5, all_func_addrs, "0x80010b5 is inside Reset_Handler().") 668 nose.tools.assert_not_in(0x8003ef9, all_func_addrs, "0x8003ef9 is inside memcpy().") 669 nose.tools.assert_not_in(0x8008419, all_func_addrs, "0x8008419 is inside __mulsf3().") 670 671 # Functions that are jumped to from tail-calls 672 tail_call_funcs = [ 0x8002bc1, 0x80046c1, 0x8000281, 0x8001bdb, 0x8002839, 0x80037ad, 0x8002c09, 0x8004165, 673 0x8004be1, 0x8002eb1 ] 674 for member in tail_call_funcs: 675 nose.tools.assert_in(member, all_func_addrs) 676 677 # also test for tailcall return addresses 678 679 # mapping of return blocks to return addrs that are the actual callers of certain tail-calls endpoints 680 tail_call_return_addrs = {0x8002bd9: [0x800275f], # 0x8002bc1 681 0x80046d7: [0x800275f], # 0x80046c1 682 0x80046ed: [0x800275f], # 0x80046c1 683 0x8001be7: [0x800068d, 0x8000695], # 0x8001bdb ?? 684 0x800284d: [0x800028b, 0x80006e1, 0x80006e7], # 0x8002839 685 0x80037f5: [0x800270b, 0x8002733, 0x8002759, 0x800098f, 0x8000997], # 0x80037ad 686 0x80037ef: [0x800270b, 0x8002733, 0x8002759, 0x800098f, 0x8000997], # 0x80037ad 687 0x8002cc9: [0x8002d3b, 0x8002b99, 0x8002e9f, 0x80041ad, 688 0x8004c87, 0x8004d35, 0x8002efb, 0x8002be9, 689 0x80046eb, 0x800464f, 0x8002a09, 0x800325f, 690 0x80047c1], # 0x8002c09 691 0x8004183: [0x8002713], # 0x8004165 692 0x8004c31: [0x8002713], # 0x8004be1 693 0x8004c69: [0x8002713], # 0x8004be1 694 0x8002ef1: [0x800273b]} # 0x8002eb1 695 696 # check all expected return addrs are present 697 for returning_block_addr, expected_return_addrs in tail_call_return_addrs.items(): 698 returning_block = cfg.model.get_any_node(returning_block_addr) 699 return_block_addrs = [rb.addr for rb in cfg.model.get_successors(returning_block)] 700 msg = "%x: unequal sizes of expected_addrs [%d] and return_block_addrs [%d]" % \ 701 (returning_block_addr, len(expected_return_addrs), len(return_block_addrs)) 702 nose.tools.assert_equal(len(return_block_addrs), len(expected_return_addrs), msg) 703 for expected_addr in expected_return_addrs: 704 msg = "expected retaddr %x not found for returning_block %x" % \ 705 (expected_addr, returning_block_addr) 706 nose.tools.assert_in(expected_addr, return_block_addrs, msg) 707 708# 709# Incorrect function-leading blocks merging 710# 711 712def test_function_leading_blocks_merging(): 713 714 # GitHub issue #1312 715 716 path = os.path.join(test_location, 'armel', 'Nucleo_read_hyperterminal-stripped.elf') 717 proj = angr.Project(path, arch=archinfo.ArchARMCortexM(), auto_load_libs=False) 718 719 cfg = proj.analyses.CFGFast(resolve_indirect_jumps=True, 720 force_complete_scan=True, 721 normalize=True, 722 symbols=False, 723 detect_tail_calls=True 724 ) 725 726 nose.tools.assert_in(0x8000799, cfg.kb.functions, "Function 0x8000799 does not exist.") 727 nose.tools.assert_not_in(0x800079b, cfg.kb.functions, "Function 0x800079b does not exist.") 728 nose.tools.assert_not_in(0x800079b, cfg.kb.functions[0x8000799].block_addrs_set, 729 "Block 0x800079b is found, but it should not exist.") 730 nose.tools.assert_in(0x8000799, cfg.kb.functions[0x8000799].block_addrs_set, 731 "Block 0x8000799 is not found inside function 0x8000799.") 732 nose.tools.assert_equal(next(iter(b for b in cfg.kb.functions[0x8000799].blocks if b.addr == 0x8000799)).size, 6, 733 "Block 0x800079b has an incorrect size.") 734 735 736# 737# Blanket 738# 739 740def test_blanket_fauxware(): 741 742 path = os.path.join(test_location, 'x86_64', 'fauxware') 743 proj = angr.Project(path, auto_load_libs=False) 744 745 cfg = proj.analyses.CFGFast() 746 747 cfb = proj.analyses.CFBlanket(kb=cfg.kb) 748 749 # it should raise a key error when calling floor_addr on address 0 because nothing is mapped there 750 nose.tools.assert_raises(KeyError, cfb.floor_addr, 0) 751 # an instruction (or a block) starts at 0x400580 752 nose.tools.assert_equal(cfb.floor_addr(0x400581), 0x400580) 753 # a block ends at 0x4005a9 (exclusive) 754 nose.tools.assert_equal(cfb.ceiling_addr(0x400581), 0x4005a9) 755 756 757# 758# Data references 759# 760 761def test_data_references_x86_64(): 762 763 path = os.path.join(test_location, 'x86_64', 'fauxware') 764 proj = angr.Project(path, auto_load_libs=False) 765 766 cfg = proj.analyses.CFGFast(data_references=True) 767 768 memory_data = cfg.memory_data 769 # There is no code reference 770 code_ref_count = len([d for d in memory_data.values() if d.sort == MemoryDataSort.CodeReference]) 771 nose.tools.assert_greater_equal(code_ref_count, 0, msg="There should be no code reference.") 772 773 # There are at least 2 pointer arrays 774 ptr_array_count = len([d for d in memory_data.values() if d.sort == MemoryDataSort.PointerArray]) 775 nose.tools.assert_greater(ptr_array_count, 2, msg="Missing some pointer arrays.") 776 777 nose.tools.assert_in(0x4008d0, memory_data) 778 sneaky_str = memory_data[0x4008d0] 779 nose.tools.assert_equal(sneaky_str.sort, "string") 780 nose.tools.assert_equal(sneaky_str.content, b"SOSNEAKY") 781 782 783def test_data_references_mipsel(): 784 785 path = os.path.join(test_location, 'mipsel', 'fauxware') 786 proj = angr.Project(path, auto_load_libs=False) 787 788 cfg = proj.analyses.CFGFast(data_references=True) 789 790 memory_data = cfg.memory_data 791 # There is no code reference 792 code_ref_count = len([d for d in memory_data.values() if d.sort == MemoryDataSort.CodeReference]) 793 nose.tools.assert_greater_equal(code_ref_count, 0, msg="There should be no code reference.") 794 795 # There are at least 2 pointer arrays 796 ptr_array_count = len([d for d in memory_data.values() if d.sort == MemoryDataSort.PointerArray]) 797 nose.tools.assert_greater_equal(ptr_array_count, 1, msg="Missing some pointer arrays.") 798 799 nose.tools.assert_in(0x400c00, memory_data) 800 sneaky_str = memory_data[0x400c00] 801 nose.tools.assert_equal(sneaky_str.sort, "string") 802 nose.tools.assert_equal(sneaky_str.content, b"SOSNEAKY") 803 804 nose.tools.assert_in(0x400c0c, memory_data) 805 str_ = memory_data[0x400c0c] 806 nose.tools.assert_equal(str_.sort, "string") 807 nose.tools.assert_equal(str_.content, b"Welcome to the admin console, trusted user!") 808 809 nose.tools.assert_in(0x400c38, memory_data) 810 str_ = memory_data[0x400c38] 811 nose.tools.assert_equal(str_.sort, "string") 812 nose.tools.assert_equal(str_.content, b"Go away!") 813 814 nose.tools.assert_in(0x400c44, memory_data) 815 str_ = memory_data[0x400c44] 816 nose.tools.assert_equal(str_.sort, "string") 817 nose.tools.assert_equal(str_.content, b"Username: ") 818 819 nose.tools.assert_in(0x400c50, memory_data) 820 str_ = memory_data[0x400c50] 821 nose.tools.assert_equal(str_.sort, "string") 822 nose.tools.assert_equal(str_.content, b"Password: ") 823 824 825# 826# CFG with patches 827# 828 829def test_cfg_with_patches(): 830 831 path = os.path.join(test_location, 'x86_64', 'fauxware') 832 proj = angr.Project(path, auto_load_libs=False) 833 834 cfg = proj.analyses.CFGFast() 835 auth_func = cfg.functions['authenticate'] 836 auth_func_addr = auth_func.addr 837 838 # Take the authenticate function and add a retn patch for its very first block 839 kb = angr.KnowledgeBase(proj) 840 kb.patches.add_patch(auth_func_addr, b"\xc3") 841 842 # with this patch, there should only be one block with one instruction in authenticate() 843 _ = proj.analyses.CFGFast(kb=kb, use_patches=True) 844 patched_func = kb.functions['authenticate'] 845 nose.tools.assert_equal(len(patched_func.block_addrs_set), 1) 846 block = patched_func._get_block(auth_func_addr) 847 nose.tools.assert_equal(len(block.instruction_addrs), 1) 848 849 # let's try to patch the second instruction of that function to ret 850 kb = angr.KnowledgeBase(proj) 851 kb.patches.add_patch(auth_func._get_block(auth_func_addr).instruction_addrs[1], b"\xc3") 852 853 # with this patch, there should only be one block with two instructions in authenticate() 854 _ = proj.analyses.CFGFast(kb=kb, use_patches=True) 855 patched_func = kb.functions['authenticate'] 856 nose.tools.assert_equal(len(patched_func.block_addrs_set), 1) 857 block = patched_func._get_block(auth_func_addr) 858 nose.tools.assert_equal(len(block.instruction_addrs), 2) 859 860 # finally, if we generate a new CFG on a KB without any patch, we should still see the normal function (with 10 861 # blocks) 862 kb = angr.KnowledgeBase(proj) 863 _ = proj.analyses.CFGFast(kb=kb, use_patches=True) 864 not_patched_func = kb.functions['authenticate'] 865 nose.tools.assert_equal(len(not_patched_func.block_addrs_set), 10) 866 867 868def test_unresolvable_targets(): 869 870 path = os.path.join(test_location, 'cgc', 'CADET_00002') 871 proj = angr.Project(path) 872 873 proj.analyses.CFGFast(normalize=True) 874 func = proj.kb.functions[0x080489E0] 875 876 true_endpoint_addrs = {0x8048bbc, 0x8048af5, 0x8048b5c, 0x8048a41, 0x8048aa8} 877 endpoint_addrs = {node.addr for node in func.endpoints} 878 nose.tools.assert_equal(len(endpoint_addrs.symmetric_difference(true_endpoint_addrs)), 0) 879 880 881def test_indirect_jump_to_outside(): 882 883 # an indirect jump might be jumping to outside as well 884 path = os.path.join(test_location, "mipsel", "libndpi.so.4.0.0") 885 proj = angr.Project(path, auto_load_libs=False) 886 887 cfg = proj.analyses.CFGFast() 888 889 nose.tools.assert_equal(len(list(cfg.functions[0x404ee4].blocks)), 3) 890 nose.tools.assert_equal(set(ep.addr for ep in cfg.functions[0x404ee4].endpoints), { 0x404f00, 0x404f08 }) 891 892 893def test_plt_stub_has_one_jumpout_site(): 894 895 # each PLT stub must have exactly one jumpout site 896 path = os.path.join(test_location, "x86_64", "1after909") 897 proj = angr.Project(path, auto_load_libs=False) 898 cfg = proj.analyses.CFGFast() 899 900 for func in cfg.kb.functions.values(): 901 if func.is_plt: 902 assert len(func.jumpout_sites) == 1 903 904 905def test_generate_special_info(): 906 907 path = os.path.join(test_location, "mipsel", "fauxware") 908 proj = angr.Project(path, auto_load_libs=False) 909 910 cfg = proj.analyses.CFGFast() 911 912 nose.tools.assert_true(any(func.info for func in cfg.functions.values())) 913 nose.tools.assert_equal(cfg.functions['main'].info['gp'], 0x418ca0) 914 915 916def test_load_from_shellcode(): 917 918 proj = angr.load_shellcode('loop: dec ecx; jnz loop; ret', 'x86') 919 cfg = proj.analyses.CFGFast() 920 921 nose.tools.assert_equal(len(cfg.model.nodes()), 2) 922 923 924def run_all(): 925 926 g = globals() 927 segmentlist_tests = [ v for k, v in g.items() if k.startswith("test_segment_list_") and hasattr(v, "__call__")] 928 929 for func in segmentlist_tests: 930 print(func.__name__) 931 func() 932 933 test_serialization_cfgnode() 934 test_serialization_cfgfast() 935 936 for args in test_cfg_0(): 937 print(args[0].__name__) 938 args[0](*args[1:]) 939 940 for args in test_cfg_0_pe(): 941 print(args[0].__name__) 942 args[0](*args[1:]) 943 944 for args in test_fauxware(): 945 print(args[0].__name__) 946 args[0](*args[1:]) 947 948 for args in test_cfg_loop_unrolling(): 949 print(args[0].__name__) 950 args[0](*args[1:]) 951 952 for args in test_cfg_switches(): 953 args[0](*args[1:]) 954 955 test_resolve_x86_elf_pic_plt() 956 test_function_names_for_unloaded_libraries() 957 test_block_instruction_addresses_armhf() 958 test_tail_call_optimization_detection_armel() 959 test_blanket_fauxware() 960 test_data_references_x86_64() 961 test_data_references_mipsel() 962 test_function_leading_blocks_merging() 963 test_cfg_with_patches() 964 test_indirect_jump_to_outside() 965 test_generate_special_info() 966 test_plt_stub_has_one_jumpout_site() 967 test_load_from_shellcode() 968 969 970def main(): 971 if len(sys.argv) > 1: 972 g = globals().copy() 973 974 r = g['test_' + sys.argv[1]]() 975 976 if r is not None: 977 for func_and_args in r: 978 func, args = func_and_args[0], func_and_args[1:] 979 func(*args) 980 else: 981 run_all() 982 983if __name__ == "__main__": 984 # logging.getLogger('angr.analyses.cfg.cfg_fast').setLevel(logging.DEBUG) 985 main() 986