1 /* Tail merging for gimple. 2 Copyright (C) 2011-2018 Free Software Foundation, Inc. 3 Contributed by Tom de Vries (tom@codesourcery.com) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* Pass overview. 22 23 24 MOTIVATIONAL EXAMPLE 25 26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at 27 28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601) 29 { 30 struct FILED.1638 * fpD.2605; 31 charD.1 fileNameD.2604[1000]; 32 intD.0 D.3915; 33 const charD.1 * restrict outputFileName.0D.3914; 34 35 # BLOCK 2 freq:10000 36 # PRED: ENTRY [100.0%] (fallthru,exec) 37 # PT = nonlocal { D.3926 } (restr) 38 outputFileName.0D.3914_3 39 = (const charD.1 * restrict) outputFileNameD.2600_2(D); 40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)> 41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3); 44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13> 45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1); 48 if (D.3915_4 == 0) 49 goto <bb 3>; 50 else 51 goto <bb 4>; 52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec) 53 54 # BLOCK 3 freq:1000 55 # PRED: 2 [10.0%] (true,exec) 56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14> 57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 59 freeD.898 (ctxD.2601_5(D)); 60 goto <bb 7>; 61 # SUCC: 7 [100.0%] (fallthru,exec) 62 63 # BLOCK 4 freq:9000 64 # PRED: 2 [90.0%] (false,exec) 65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14> 66 # PT = nonlocal escaped 67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B); 70 if (fpD.2605_8 == 0B) 71 goto <bb 5>; 72 else 73 goto <bb 6>; 74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec) 75 76 # BLOCK 5 freq:173 77 # PRED: 4 [1.9%] (true,exec) 78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16> 79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 81 freeD.898 (ctxD.2601_5(D)); 82 goto <bb 7>; 83 # SUCC: 7 [100.0%] (fallthru,exec) 84 85 # BLOCK 6 freq:8827 86 # PRED: 4 [98.1%] (false,exec) 87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16> 88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8); 91 # SUCC: 7 [100.0%] (fallthru,exec) 92 93 # BLOCK 7 freq:10000 94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec) 95 6 [100.0%] (fallthru,exec) 96 # PT = nonlocal null 97 98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)> 99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5), 100 .MEMD.3923_18(6)> 101 # VUSE <.MEMD.3923_11> 102 return ctxD.2601_1; 103 # SUCC: EXIT [100.0%] 104 } 105 106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the 107 same successors, and the same operations. 108 109 110 CONTEXT 111 112 A technique called tail merging (or cross jumping) can fix the example 113 above. For a block, we look for common code at the end (the tail) of the 114 predecessor blocks, and insert jumps from one block to the other. 115 The example is a special case for tail merging, in that 2 whole blocks 116 can be merged, rather than just the end parts of it. 117 We currently only focus on whole block merging, so in that sense 118 calling this pass tail merge is a bit of a misnomer. 119 120 We distinguish 2 kinds of situations in which blocks can be merged: 121 - same operations, same predecessors. The successor edges coming from one 122 block are redirected to come from the other block. 123 - same operations, same successors. The predecessor edges entering one block 124 are redirected to enter the other block. Note that this operation might 125 involve introducing phi operations. 126 127 For efficient implementation, we would like to value numbers the blocks, and 128 have a comparison operator that tells us whether the blocks are equal. 129 Besides being runtime efficient, block value numbering should also abstract 130 from irrelevant differences in order of operations, much like normal value 131 numbering abstracts from irrelevant order of operations. 132 133 For the first situation (same_operations, same predecessors), normal value 134 numbering fits well. We can calculate a block value number based on the 135 value numbers of the defs and vdefs. 136 137 For the second situation (same operations, same successors), this approach 138 doesn't work so well. We can illustrate this using the example. The calls 139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will 140 remain different in value numbering, since they represent different memory 141 states. So the resulting vdefs of the frees will be different in value 142 numbering, so the block value numbers will be different. 143 144 The reason why we call the blocks equal is not because they define the same 145 values, but because uses in the blocks use (possibly different) defs in the 146 same way. To be able to detect this efficiently, we need to do some kind of 147 reverse value numbering, meaning number the uses rather than the defs, and 148 calculate a block value number based on the value number of the uses. 149 Ideally, a block comparison operator will also indicate which phis are needed 150 to merge the blocks. 151 152 For the moment, we don't do block value numbering, but we do insn-by-insn 153 matching, using scc value numbers to match operations with results, and 154 structural comparison otherwise, while ignoring vop mismatches. 155 156 157 IMPLEMENTATION 158 159 1. The pass first determines all groups of blocks with the same successor 160 blocks. 161 2. Within each group, it tries to determine clusters of equal basic blocks. 162 3. The clusters are applied. 163 4. The same successor groups are updated. 164 5. This process is repeated from 2 onwards, until no more changes. 165 166 167 LIMITATIONS/TODO 168 169 - block only 170 - handles only 'same operations, same successors'. 171 It handles same predecessors as a special subcase though. 172 - does not implement the reverse value numbering and block value numbering. 173 - improve memory allocation: use garbage collected memory, obstacks, 174 allocpools where appropriate. 175 - no insertion of gimple_reg phis, We only introduce vop-phis. 176 - handle blocks with gimple_reg phi_nodes. 177 178 179 PASS PLACEMENT 180 This 'pass' is not a stand-alone gimple pass, but runs as part of 181 pass_pre, in order to share the value numbering. 182 183 184 SWITCHES 185 186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */ 187 188 #include "config.h" 189 #include "system.h" 190 #include "coretypes.h" 191 #include "backend.h" 192 #include "tree.h" 193 #include "gimple.h" 194 #include "cfghooks.h" 195 #include "tree-pass.h" 196 #include "ssa.h" 197 #include "fold-const.h" 198 #include "trans-mem.h" 199 #include "cfganal.h" 200 #include "cfgcleanup.h" 201 #include "gimple-iterator.h" 202 #include "tree-cfg.h" 203 #include "tree-into-ssa.h" 204 #include "params.h" 205 #include "tree-ssa-sccvn.h" 206 #include "cfgloop.h" 207 #include "tree-eh.h" 208 #include "tree-cfgcleanup.h" 209 210 const int ignore_edge_flags = EDGE_DFS_BACK | EDGE_EXECUTABLE; 211 212 /* Describes a group of bbs with the same successors. The successor bbs are 213 cached in succs, and the successor edge flags are cached in succ_flags. 214 If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags, 215 it's marked in inverse. 216 Additionally, the hash value for the struct is cached in hashval, and 217 in_worklist indicates whether it's currently part of worklist. */ 218 219 struct same_succ : pointer_hash <same_succ> 220 { 221 /* The bbs that have the same successor bbs. */ 222 bitmap bbs; 223 /* The successor bbs. */ 224 bitmap succs; 225 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for 226 bb. */ 227 bitmap inverse; 228 /* The edge flags for each of the successor bbs. */ 229 vec<int> succ_flags; 230 /* Indicates whether the struct is currently in the worklist. */ 231 bool in_worklist; 232 /* The hash value of the struct. */ 233 hashval_t hashval; 234 235 /* hash_table support. */ 236 static inline hashval_t hash (const same_succ *); 237 static int equal (const same_succ *, const same_succ *); 238 static void remove (same_succ *); 239 }; 240 241 /* hash routine for hash_table support, returns hashval of E. */ 242 243 inline hashval_t 244 same_succ::hash (const same_succ *e) 245 { 246 return e->hashval; 247 } 248 249 /* A group of bbs where 1 bb from bbs can replace the other bbs. */ 250 251 struct bb_cluster 252 { 253 /* The bbs in the cluster. */ 254 bitmap bbs; 255 /* The preds of the bbs in the cluster. */ 256 bitmap preds; 257 /* Index in all_clusters vector. */ 258 int index; 259 /* The bb to replace the cluster with. */ 260 basic_block rep_bb; 261 }; 262 263 /* Per bb-info. */ 264 265 struct aux_bb_info 266 { 267 /* The number of non-debug statements in the bb. */ 268 int size; 269 /* The same_succ that this bb is a member of. */ 270 same_succ *bb_same_succ; 271 /* The cluster that this bb is a member of. */ 272 bb_cluster *cluster; 273 /* The vop state at the exit of a bb. This is shortlived data, used to 274 communicate data between update_block_by and update_vuses. */ 275 tree vop_at_exit; 276 /* The bb that either contains or is dominated by the dependencies of the 277 bb. */ 278 basic_block dep_bb; 279 }; 280 281 /* Macros to access the fields of struct aux_bb_info. */ 282 283 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size) 284 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ) 285 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster) 286 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit) 287 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb) 288 289 /* Valueization helper querying the VN lattice. */ 290 291 static tree 292 tail_merge_valueize (tree name) 293 { 294 if (TREE_CODE (name) == SSA_NAME 295 && has_VN_INFO (name)) 296 { 297 tree tem = VN_INFO (name)->valnum; 298 if (tem != VN_TOP) 299 return tem; 300 } 301 return name; 302 } 303 304 /* Returns true if the only effect a statement STMT has, is to define locally 305 used SSA_NAMEs. */ 306 307 static bool 308 stmt_local_def (gimple *stmt) 309 { 310 basic_block bb, def_bb; 311 imm_use_iterator iter; 312 use_operand_p use_p; 313 tree val; 314 def_operand_p def_p; 315 316 if (gimple_vdef (stmt) != NULL_TREE 317 || gimple_has_side_effects (stmt) 318 || gimple_could_trap_p_1 (stmt, false, false) 319 || gimple_vuse (stmt) != NULL_TREE 320 /* Copied from tree-ssa-ifcombine.c:bb_no_side_effects_p(): 321 const calls don't match any of the above, yet they could 322 still have some side-effects - they could contain 323 gimple_could_trap_p statements, like floating point 324 exceptions or integer division by zero. See PR70586. 325 FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p 326 should handle this. */ 327 || is_gimple_call (stmt)) 328 return false; 329 330 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF); 331 if (def_p == NULL) 332 return false; 333 334 val = DEF_FROM_PTR (def_p); 335 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME) 336 return false; 337 338 def_bb = gimple_bb (stmt); 339 340 FOR_EACH_IMM_USE_FAST (use_p, iter, val) 341 { 342 if (is_gimple_debug (USE_STMT (use_p))) 343 continue; 344 bb = gimple_bb (USE_STMT (use_p)); 345 if (bb == def_bb) 346 continue; 347 348 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI 349 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb) 350 continue; 351 352 return false; 353 } 354 355 return true; 356 } 357 358 /* Let GSI skip forwards over local defs. */ 359 360 static void 361 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi) 362 { 363 gimple *stmt; 364 365 while (true) 366 { 367 if (gsi_end_p (*gsi)) 368 return; 369 stmt = gsi_stmt (*gsi); 370 if (!stmt_local_def (stmt)) 371 return; 372 gsi_next_nondebug (gsi); 373 } 374 } 375 376 /* VAL1 and VAL2 are either: 377 - uses in BB1 and BB2, or 378 - phi alternatives for BB1 and BB2. 379 Return true if the uses have the same gvn value. */ 380 381 static bool 382 gvn_uses_equal (tree val1, tree val2) 383 { 384 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE); 385 386 if (val1 == val2) 387 return true; 388 389 if (tail_merge_valueize (val1) != tail_merge_valueize (val2)) 390 return false; 391 392 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1)) 393 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2))); 394 } 395 396 /* Prints E to FILE. */ 397 398 static void 399 same_succ_print (FILE *file, const same_succ *e) 400 { 401 unsigned int i; 402 bitmap_print (file, e->bbs, "bbs:", "\n"); 403 bitmap_print (file, e->succs, "succs:", "\n"); 404 bitmap_print (file, e->inverse, "inverse:", "\n"); 405 fprintf (file, "flags:"); 406 for (i = 0; i < e->succ_flags.length (); ++i) 407 fprintf (file, " %x", e->succ_flags[i]); 408 fprintf (file, "\n"); 409 } 410 411 /* Prints same_succ VE to VFILE. */ 412 413 inline int 414 ssa_same_succ_print_traverse (same_succ **pe, FILE *file) 415 { 416 const same_succ *e = *pe; 417 same_succ_print (file, e); 418 return 1; 419 } 420 421 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */ 422 423 static void 424 update_dep_bb (basic_block use_bb, tree val) 425 { 426 basic_block dep_bb; 427 428 /* Not a dep. */ 429 if (TREE_CODE (val) != SSA_NAME) 430 return; 431 432 /* Skip use of global def. */ 433 if (SSA_NAME_IS_DEFAULT_DEF (val)) 434 return; 435 436 /* Skip use of local def. */ 437 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val)); 438 if (dep_bb == use_bb) 439 return; 440 441 if (BB_DEP_BB (use_bb) == NULL 442 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb))) 443 BB_DEP_BB (use_bb) = dep_bb; 444 } 445 446 /* Update BB_DEP_BB, given the dependencies in STMT. */ 447 448 static void 449 stmt_update_dep_bb (gimple *stmt) 450 { 451 ssa_op_iter iter; 452 use_operand_p use; 453 454 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE) 455 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use)); 456 } 457 458 /* Calculates hash value for same_succ VE. */ 459 460 static hashval_t 461 same_succ_hash (const same_succ *e) 462 { 463 inchash::hash hstate (bitmap_hash (e->succs)); 464 int flags; 465 unsigned int i; 466 unsigned int first = bitmap_first_set_bit (e->bbs); 467 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first); 468 int size = 0; 469 gimple *stmt; 470 tree arg; 471 unsigned int s; 472 bitmap_iterator bs; 473 474 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb); 475 !gsi_end_p (gsi); gsi_next_nondebug (&gsi)) 476 { 477 stmt = gsi_stmt (gsi); 478 stmt_update_dep_bb (stmt); 479 if (stmt_local_def (stmt)) 480 continue; 481 size++; 482 483 hstate.add_int (gimple_code (stmt)); 484 if (is_gimple_assign (stmt)) 485 hstate.add_int (gimple_assign_rhs_code (stmt)); 486 if (!is_gimple_call (stmt)) 487 continue; 488 if (gimple_call_internal_p (stmt)) 489 hstate.add_int (gimple_call_internal_fn (stmt)); 490 else 491 { 492 inchash::add_expr (gimple_call_fn (stmt), hstate); 493 if (gimple_call_chain (stmt)) 494 inchash::add_expr (gimple_call_chain (stmt), hstate); 495 } 496 for (i = 0; i < gimple_call_num_args (stmt); i++) 497 { 498 arg = gimple_call_arg (stmt, i); 499 arg = tail_merge_valueize (arg); 500 inchash::add_expr (arg, hstate); 501 } 502 } 503 504 hstate.add_int (size); 505 BB_SIZE (bb) = size; 506 507 hstate.add_int (bb->loop_father->num); 508 509 for (i = 0; i < e->succ_flags.length (); ++i) 510 { 511 flags = e->succ_flags[i]; 512 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 513 hstate.add_int (flags); 514 } 515 516 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs) 517 { 518 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx; 519 for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s)); 520 !gsi_end_p (gsi); 521 gsi_next (&gsi)) 522 { 523 gphi *phi = gsi.phi (); 524 tree lhs = gimple_phi_result (phi); 525 tree val = gimple_phi_arg_def (phi, n); 526 527 if (virtual_operand_p (lhs)) 528 continue; 529 update_dep_bb (bb, val); 530 } 531 } 532 533 return hstate.end (); 534 } 535 536 /* Returns true if E1 and E2 have 2 successors, and if the successor flags 537 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for 538 the other edge flags. */ 539 540 static bool 541 inverse_flags (const same_succ *e1, const same_succ *e2) 542 { 543 int f1a, f1b, f2a, f2b; 544 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 545 546 if (e1->succ_flags.length () != 2) 547 return false; 548 549 f1a = e1->succ_flags[0]; 550 f1b = e1->succ_flags[1]; 551 f2a = e2->succ_flags[0]; 552 f2b = e2->succ_flags[1]; 553 554 if (f1a == f2a && f1b == f2b) 555 return false; 556 557 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask); 558 } 559 560 /* Compares SAME_SUCCs E1 and E2. */ 561 562 int 563 same_succ::equal (const same_succ *e1, const same_succ *e2) 564 { 565 unsigned int i, first1, first2; 566 gimple_stmt_iterator gsi1, gsi2; 567 gimple *s1, *s2; 568 basic_block bb1, bb2; 569 570 if (e1 == e2) 571 return 1; 572 573 if (e1->hashval != e2->hashval) 574 return 0; 575 576 if (e1->succ_flags.length () != e2->succ_flags.length ()) 577 return 0; 578 579 if (!bitmap_equal_p (e1->succs, e2->succs)) 580 return 0; 581 582 if (!inverse_flags (e1, e2)) 583 { 584 for (i = 0; i < e1->succ_flags.length (); ++i) 585 if (e1->succ_flags[i] != e2->succ_flags[i]) 586 return 0; 587 } 588 589 first1 = bitmap_first_set_bit (e1->bbs); 590 first2 = bitmap_first_set_bit (e2->bbs); 591 592 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1); 593 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2); 594 595 if (BB_SIZE (bb1) != BB_SIZE (bb2)) 596 return 0; 597 598 if (bb1->loop_father != bb2->loop_father) 599 return 0; 600 601 gsi1 = gsi_start_nondebug_bb (bb1); 602 gsi2 = gsi_start_nondebug_bb (bb2); 603 gsi_advance_fw_nondebug_nonlocal (&gsi1); 604 gsi_advance_fw_nondebug_nonlocal (&gsi2); 605 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2))) 606 { 607 s1 = gsi_stmt (gsi1); 608 s2 = gsi_stmt (gsi2); 609 if (gimple_code (s1) != gimple_code (s2)) 610 return 0; 611 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2)) 612 return 0; 613 gsi_next_nondebug (&gsi1); 614 gsi_next_nondebug (&gsi2); 615 gsi_advance_fw_nondebug_nonlocal (&gsi1); 616 gsi_advance_fw_nondebug_nonlocal (&gsi2); 617 } 618 619 return 1; 620 } 621 622 /* Alloc and init a new SAME_SUCC. */ 623 624 static same_succ * 625 same_succ_alloc (void) 626 { 627 same_succ *same = XNEW (struct same_succ); 628 629 same->bbs = BITMAP_ALLOC (NULL); 630 same->succs = BITMAP_ALLOC (NULL); 631 same->inverse = BITMAP_ALLOC (NULL); 632 same->succ_flags.create (10); 633 same->in_worklist = false; 634 635 return same; 636 } 637 638 /* Delete same_succ E. */ 639 640 void 641 same_succ::remove (same_succ *e) 642 { 643 BITMAP_FREE (e->bbs); 644 BITMAP_FREE (e->succs); 645 BITMAP_FREE (e->inverse); 646 e->succ_flags.release (); 647 648 XDELETE (e); 649 } 650 651 /* Reset same_succ SAME. */ 652 653 static void 654 same_succ_reset (same_succ *same) 655 { 656 bitmap_clear (same->bbs); 657 bitmap_clear (same->succs); 658 bitmap_clear (same->inverse); 659 same->succ_flags.truncate (0); 660 } 661 662 static hash_table<same_succ> *same_succ_htab; 663 664 /* Array that is used to store the edge flags for a successor. */ 665 666 static int *same_succ_edge_flags; 667 668 /* Bitmap that is used to mark bbs that are recently deleted. */ 669 670 static bitmap deleted_bbs; 671 672 /* Bitmap that is used to mark predecessors of bbs that are 673 deleted. */ 674 675 static bitmap deleted_bb_preds; 676 677 /* Prints same_succ_htab to stderr. */ 678 679 extern void debug_same_succ (void); 680 DEBUG_FUNCTION void 681 debug_same_succ ( void) 682 { 683 same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr); 684 } 685 686 687 /* Vector of bbs to process. */ 688 689 static vec<same_succ *> worklist; 690 691 /* Prints worklist to FILE. */ 692 693 static void 694 print_worklist (FILE *file) 695 { 696 unsigned int i; 697 for (i = 0; i < worklist.length (); ++i) 698 same_succ_print (file, worklist[i]); 699 } 700 701 /* Adds SAME to worklist. */ 702 703 static void 704 add_to_worklist (same_succ *same) 705 { 706 if (same->in_worklist) 707 return; 708 709 if (bitmap_count_bits (same->bbs) < 2) 710 return; 711 712 same->in_worklist = true; 713 worklist.safe_push (same); 714 } 715 716 /* Add BB to same_succ_htab. */ 717 718 static void 719 find_same_succ_bb (basic_block bb, same_succ **same_p) 720 { 721 unsigned int j; 722 bitmap_iterator bj; 723 same_succ *same = *same_p; 724 same_succ **slot; 725 edge_iterator ei; 726 edge e; 727 728 if (bb == NULL) 729 return; 730 bitmap_set_bit (same->bbs, bb->index); 731 FOR_EACH_EDGE (e, ei, bb->succs) 732 { 733 int index = e->dest->index; 734 bitmap_set_bit (same->succs, index); 735 same_succ_edge_flags[index] = (e->flags & ~ignore_edge_flags); 736 } 737 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj) 738 same->succ_flags.safe_push (same_succ_edge_flags[j]); 739 740 same->hashval = same_succ_hash (same); 741 742 slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT); 743 if (*slot == NULL) 744 { 745 *slot = same; 746 BB_SAME_SUCC (bb) = same; 747 add_to_worklist (same); 748 *same_p = NULL; 749 } 750 else 751 { 752 bitmap_set_bit ((*slot)->bbs, bb->index); 753 BB_SAME_SUCC (bb) = *slot; 754 add_to_worklist (*slot); 755 if (inverse_flags (same, *slot)) 756 bitmap_set_bit ((*slot)->inverse, bb->index); 757 same_succ_reset (same); 758 } 759 } 760 761 /* Find bbs with same successors. */ 762 763 static void 764 find_same_succ (void) 765 { 766 same_succ *same = same_succ_alloc (); 767 basic_block bb; 768 769 FOR_EACH_BB_FN (bb, cfun) 770 { 771 find_same_succ_bb (bb, &same); 772 if (same == NULL) 773 same = same_succ_alloc (); 774 } 775 776 same_succ::remove (same); 777 } 778 779 /* Initializes worklist administration. */ 780 781 static void 782 init_worklist (void) 783 { 784 alloc_aux_for_blocks (sizeof (struct aux_bb_info)); 785 same_succ_htab = new hash_table<same_succ> (n_basic_blocks_for_fn (cfun)); 786 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun)); 787 deleted_bbs = BITMAP_ALLOC (NULL); 788 deleted_bb_preds = BITMAP_ALLOC (NULL); 789 worklist.create (n_basic_blocks_for_fn (cfun)); 790 find_same_succ (); 791 792 if (dump_file && (dump_flags & TDF_DETAILS)) 793 { 794 fprintf (dump_file, "initial worklist:\n"); 795 print_worklist (dump_file); 796 } 797 } 798 799 /* Deletes worklist administration. */ 800 801 static void 802 delete_worklist (void) 803 { 804 free_aux_for_blocks (); 805 delete same_succ_htab; 806 same_succ_htab = NULL; 807 XDELETEVEC (same_succ_edge_flags); 808 same_succ_edge_flags = NULL; 809 BITMAP_FREE (deleted_bbs); 810 BITMAP_FREE (deleted_bb_preds); 811 worklist.release (); 812 } 813 814 /* Mark BB as deleted, and mark its predecessors. */ 815 816 static void 817 mark_basic_block_deleted (basic_block bb) 818 { 819 edge e; 820 edge_iterator ei; 821 822 bitmap_set_bit (deleted_bbs, bb->index); 823 824 FOR_EACH_EDGE (e, ei, bb->preds) 825 bitmap_set_bit (deleted_bb_preds, e->src->index); 826 } 827 828 /* Removes BB from its corresponding same_succ. */ 829 830 static void 831 same_succ_flush_bb (basic_block bb) 832 { 833 same_succ *same = BB_SAME_SUCC (bb); 834 if (! same) 835 return; 836 837 BB_SAME_SUCC (bb) = NULL; 838 if (bitmap_single_bit_set_p (same->bbs)) 839 same_succ_htab->remove_elt_with_hash (same, same->hashval); 840 else 841 bitmap_clear_bit (same->bbs, bb->index); 842 } 843 844 /* Removes all bbs in BBS from their corresponding same_succ. */ 845 846 static void 847 same_succ_flush_bbs (bitmap bbs) 848 { 849 unsigned int i; 850 bitmap_iterator bi; 851 852 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi) 853 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i)); 854 } 855 856 /* Release the last vdef in BB, either normal or phi result. */ 857 858 static void 859 release_last_vdef (basic_block bb) 860 { 861 for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i); 862 gsi_prev_nondebug (&i)) 863 { 864 gimple *stmt = gsi_stmt (i); 865 if (gimple_vdef (stmt) == NULL_TREE) 866 continue; 867 868 mark_virtual_operand_for_renaming (gimple_vdef (stmt)); 869 return; 870 } 871 872 for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i); 873 gsi_next (&i)) 874 { 875 gphi *phi = i.phi (); 876 tree res = gimple_phi_result (phi); 877 878 if (!virtual_operand_p (res)) 879 continue; 880 881 mark_virtual_phi_result_for_renaming (phi); 882 return; 883 } 884 } 885 886 /* For deleted_bb_preds, find bbs with same successors. */ 887 888 static void 889 update_worklist (void) 890 { 891 unsigned int i; 892 bitmap_iterator bi; 893 basic_block bb; 894 same_succ *same; 895 896 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs); 897 bitmap_clear (deleted_bbs); 898 899 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK); 900 same_succ_flush_bbs (deleted_bb_preds); 901 902 same = same_succ_alloc (); 903 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi) 904 { 905 bb = BASIC_BLOCK_FOR_FN (cfun, i); 906 gcc_assert (bb != NULL); 907 find_same_succ_bb (bb, &same); 908 if (same == NULL) 909 same = same_succ_alloc (); 910 } 911 same_succ::remove (same); 912 bitmap_clear (deleted_bb_preds); 913 } 914 915 /* Prints cluster C to FILE. */ 916 917 static void 918 print_cluster (FILE *file, bb_cluster *c) 919 { 920 if (c == NULL) 921 return; 922 bitmap_print (file, c->bbs, "bbs:", "\n"); 923 bitmap_print (file, c->preds, "preds:", "\n"); 924 } 925 926 /* Prints cluster C to stderr. */ 927 928 extern void debug_cluster (bb_cluster *); 929 DEBUG_FUNCTION void 930 debug_cluster (bb_cluster *c) 931 { 932 print_cluster (stderr, c); 933 } 934 935 /* Update C->rep_bb, given that BB is added to the cluster. */ 936 937 static void 938 update_rep_bb (bb_cluster *c, basic_block bb) 939 { 940 /* Initial. */ 941 if (c->rep_bb == NULL) 942 { 943 c->rep_bb = bb; 944 return; 945 } 946 947 /* Current needs no deps, keep it. */ 948 if (BB_DEP_BB (c->rep_bb) == NULL) 949 return; 950 951 /* Bb needs no deps, change rep_bb. */ 952 if (BB_DEP_BB (bb) == NULL) 953 { 954 c->rep_bb = bb; 955 return; 956 } 957 958 /* Bb needs last deps earlier than current, change rep_bb. A potential 959 problem with this, is that the first deps might also be earlier, which 960 would mean we prefer longer lifetimes for the deps. To be able to check 961 for this, we would have to trace BB_FIRST_DEP_BB as well, besides 962 BB_DEP_BB, which is really BB_LAST_DEP_BB. 963 The benefit of choosing the bb with last deps earlier, is that it can 964 potentially be used as replacement for more bbs. */ 965 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb))) 966 c->rep_bb = bb; 967 } 968 969 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */ 970 971 static void 972 add_bb_to_cluster (bb_cluster *c, basic_block bb) 973 { 974 edge e; 975 edge_iterator ei; 976 977 bitmap_set_bit (c->bbs, bb->index); 978 979 FOR_EACH_EDGE (e, ei, bb->preds) 980 bitmap_set_bit (c->preds, e->src->index); 981 982 update_rep_bb (c, bb); 983 } 984 985 /* Allocate and init new cluster. */ 986 987 static bb_cluster * 988 new_cluster (void) 989 { 990 bb_cluster *c; 991 c = XCNEW (bb_cluster); 992 c->bbs = BITMAP_ALLOC (NULL); 993 c->preds = BITMAP_ALLOC (NULL); 994 c->rep_bb = NULL; 995 return c; 996 } 997 998 /* Delete clusters. */ 999 1000 static void 1001 delete_cluster (bb_cluster *c) 1002 { 1003 if (c == NULL) 1004 return; 1005 BITMAP_FREE (c->bbs); 1006 BITMAP_FREE (c->preds); 1007 XDELETE (c); 1008 } 1009 1010 1011 /* Array that contains all clusters. */ 1012 1013 static vec<bb_cluster *> all_clusters; 1014 1015 /* Allocate all cluster vectors. */ 1016 1017 static void 1018 alloc_cluster_vectors (void) 1019 { 1020 all_clusters.create (n_basic_blocks_for_fn (cfun)); 1021 } 1022 1023 /* Reset all cluster vectors. */ 1024 1025 static void 1026 reset_cluster_vectors (void) 1027 { 1028 unsigned int i; 1029 basic_block bb; 1030 for (i = 0; i < all_clusters.length (); ++i) 1031 delete_cluster (all_clusters[i]); 1032 all_clusters.truncate (0); 1033 FOR_EACH_BB_FN (bb, cfun) 1034 BB_CLUSTER (bb) = NULL; 1035 } 1036 1037 /* Delete all cluster vectors. */ 1038 1039 static void 1040 delete_cluster_vectors (void) 1041 { 1042 unsigned int i; 1043 for (i = 0; i < all_clusters.length (); ++i) 1044 delete_cluster (all_clusters[i]); 1045 all_clusters.release (); 1046 } 1047 1048 /* Merge cluster C2 into C1. */ 1049 1050 static void 1051 merge_clusters (bb_cluster *c1, bb_cluster *c2) 1052 { 1053 bitmap_ior_into (c1->bbs, c2->bbs); 1054 bitmap_ior_into (c1->preds, c2->preds); 1055 } 1056 1057 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in 1058 all_clusters, or merge c with existing cluster. */ 1059 1060 static void 1061 set_cluster (basic_block bb1, basic_block bb2) 1062 { 1063 basic_block merge_bb, other_bb; 1064 bb_cluster *merge, *old, *c; 1065 1066 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL) 1067 { 1068 c = new_cluster (); 1069 add_bb_to_cluster (c, bb1); 1070 add_bb_to_cluster (c, bb2); 1071 BB_CLUSTER (bb1) = c; 1072 BB_CLUSTER (bb2) = c; 1073 c->index = all_clusters.length (); 1074 all_clusters.safe_push (c); 1075 } 1076 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL) 1077 { 1078 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1; 1079 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2; 1080 merge = BB_CLUSTER (merge_bb); 1081 add_bb_to_cluster (merge, other_bb); 1082 BB_CLUSTER (other_bb) = merge; 1083 } 1084 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2)) 1085 { 1086 unsigned int i; 1087 bitmap_iterator bi; 1088 1089 old = BB_CLUSTER (bb2); 1090 merge = BB_CLUSTER (bb1); 1091 merge_clusters (merge, old); 1092 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi) 1093 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge; 1094 all_clusters[old->index] = NULL; 1095 update_rep_bb (merge, old->rep_bb); 1096 delete_cluster (old); 1097 } 1098 else 1099 gcc_unreachable (); 1100 } 1101 1102 /* Return true if gimple operands T1 and T2 have the same value. */ 1103 1104 static bool 1105 gimple_operand_equal_value_p (tree t1, tree t2) 1106 { 1107 if (t1 == t2) 1108 return true; 1109 1110 if (t1 == NULL_TREE 1111 || t2 == NULL_TREE) 1112 return false; 1113 1114 if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS)) 1115 return true; 1116 1117 return gvn_uses_equal (t1, t2); 1118 } 1119 1120 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and 1121 gimple_bb (s2) are members of SAME_SUCC. */ 1122 1123 static bool 1124 gimple_equal_p (same_succ *same_succ, gimple *s1, gimple *s2) 1125 { 1126 unsigned int i; 1127 tree lhs1, lhs2; 1128 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1129 tree t1, t2; 1130 bool inv_cond; 1131 enum tree_code code1, code2; 1132 1133 if (gimple_code (s1) != gimple_code (s2)) 1134 return false; 1135 1136 switch (gimple_code (s1)) 1137 { 1138 case GIMPLE_CALL: 1139 if (!gimple_call_same_target_p (s1, s2)) 1140 return false; 1141 1142 t1 = gimple_call_chain (s1); 1143 t2 = gimple_call_chain (s2); 1144 if (!gimple_operand_equal_value_p (t1, t2)) 1145 return false; 1146 1147 if (gimple_call_num_args (s1) != gimple_call_num_args (s2)) 1148 return false; 1149 1150 for (i = 0; i < gimple_call_num_args (s1); ++i) 1151 { 1152 t1 = gimple_call_arg (s1, i); 1153 t2 = gimple_call_arg (s2, i); 1154 if (!gimple_operand_equal_value_p (t1, t2)) 1155 return false; 1156 } 1157 1158 lhs1 = gimple_get_lhs (s1); 1159 lhs2 = gimple_get_lhs (s2); 1160 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE) 1161 return true; 1162 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE) 1163 return false; 1164 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME) 1165 return tail_merge_valueize (lhs1) == tail_merge_valueize (lhs2); 1166 return operand_equal_p (lhs1, lhs2, 0); 1167 1168 case GIMPLE_ASSIGN: 1169 lhs1 = gimple_get_lhs (s1); 1170 lhs2 = gimple_get_lhs (s2); 1171 if (TREE_CODE (lhs1) != SSA_NAME 1172 && TREE_CODE (lhs2) != SSA_NAME) 1173 return (operand_equal_p (lhs1, lhs2, 0) 1174 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1), 1175 gimple_assign_rhs1 (s2))); 1176 else if (TREE_CODE (lhs1) == SSA_NAME 1177 && TREE_CODE (lhs2) == SSA_NAME) 1178 return operand_equal_p (gimple_assign_rhs1 (s1), 1179 gimple_assign_rhs1 (s2), 0); 1180 return false; 1181 1182 case GIMPLE_COND: 1183 t1 = gimple_cond_lhs (s1); 1184 t2 = gimple_cond_lhs (s2); 1185 if (!gimple_operand_equal_value_p (t1, t2)) 1186 return false; 1187 1188 t1 = gimple_cond_rhs (s1); 1189 t2 = gimple_cond_rhs (s2); 1190 if (!gimple_operand_equal_value_p (t1, t2)) 1191 return false; 1192 1193 code1 = gimple_expr_code (s1); 1194 code2 = gimple_expr_code (s2); 1195 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index) 1196 != bitmap_bit_p (same_succ->inverse, bb2->index)); 1197 if (inv_cond) 1198 { 1199 bool honor_nans = HONOR_NANS (t1); 1200 code2 = invert_tree_comparison (code2, honor_nans); 1201 } 1202 return code1 == code2; 1203 1204 default: 1205 return false; 1206 } 1207 } 1208 1209 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE. 1210 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the 1211 processed statements. */ 1212 1213 static void 1214 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse, 1215 bool *vuse_escaped) 1216 { 1217 gimple *stmt; 1218 tree lvuse; 1219 1220 while (true) 1221 { 1222 if (gsi_end_p (*gsi)) 1223 return; 1224 stmt = gsi_stmt (*gsi); 1225 1226 lvuse = gimple_vuse (stmt); 1227 if (lvuse != NULL_TREE) 1228 { 1229 *vuse = lvuse; 1230 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF)) 1231 *vuse_escaped = true; 1232 } 1233 1234 if (!stmt_local_def (stmt)) 1235 return; 1236 gsi_prev_nondebug (gsi); 1237 } 1238 } 1239 1240 /* Return true if equal (in the sense of gimple_equal_p) statements STMT1 and 1241 STMT2 are allowed to be merged. */ 1242 1243 static bool 1244 merge_stmts_p (gimple *stmt1, gimple *stmt2) 1245 { 1246 /* What could be better than this here is to blacklist the bb 1247 containing the stmt, when encountering the stmt f.i. in 1248 same_succ_hash. */ 1249 if (is_tm_ending (stmt1)) 1250 return false; 1251 1252 /* Verify EH landing pads. */ 1253 if (lookup_stmt_eh_lp_fn (cfun, stmt1) != lookup_stmt_eh_lp_fn (cfun, stmt2)) 1254 return false; 1255 1256 if (is_gimple_call (stmt1) 1257 && gimple_call_internal_p (stmt1)) 1258 switch (gimple_call_internal_fn (stmt1)) 1259 { 1260 case IFN_UBSAN_NULL: 1261 case IFN_UBSAN_BOUNDS: 1262 case IFN_UBSAN_VPTR: 1263 case IFN_UBSAN_CHECK_ADD: 1264 case IFN_UBSAN_CHECK_SUB: 1265 case IFN_UBSAN_CHECK_MUL: 1266 case IFN_UBSAN_OBJECT_SIZE: 1267 case IFN_UBSAN_PTR: 1268 case IFN_ASAN_CHECK: 1269 /* For these internal functions, gimple_location is an implicit 1270 parameter, which will be used explicitly after expansion. 1271 Merging these statements may cause confusing line numbers in 1272 sanitizer messages. */ 1273 return gimple_location (stmt1) == gimple_location (stmt2); 1274 default: 1275 break; 1276 } 1277 1278 return true; 1279 } 1280 1281 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so, 1282 clusters them. */ 1283 1284 static void 1285 find_duplicate (same_succ *same_succ, basic_block bb1, basic_block bb2) 1286 { 1287 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1); 1288 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2); 1289 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE; 1290 bool vuse_escaped = false; 1291 1292 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1293 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1294 1295 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2)) 1296 { 1297 gimple *stmt1 = gsi_stmt (gsi1); 1298 gimple *stmt2 = gsi_stmt (gsi2); 1299 1300 if (gimple_code (stmt1) == GIMPLE_LABEL 1301 && gimple_code (stmt2) == GIMPLE_LABEL) 1302 break; 1303 1304 if (!gimple_equal_p (same_succ, stmt1, stmt2)) 1305 return; 1306 1307 if (!merge_stmts_p (stmt1, stmt2)) 1308 return; 1309 1310 gsi_prev_nondebug (&gsi1); 1311 gsi_prev_nondebug (&gsi2); 1312 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1313 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1314 } 1315 1316 while (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL) 1317 { 1318 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1))); 1319 if (DECL_NONLOCAL (label) || FORCED_LABEL (label)) 1320 return; 1321 gsi_prev (&gsi1); 1322 } 1323 while (!gsi_end_p (gsi2) && gimple_code (gsi_stmt (gsi2)) == GIMPLE_LABEL) 1324 { 1325 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi2))); 1326 if (DECL_NONLOCAL (label) || FORCED_LABEL (label)) 1327 return; 1328 gsi_prev (&gsi2); 1329 } 1330 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2))) 1331 return; 1332 1333 /* If the incoming vuses are not the same, and the vuse escaped into an 1334 SSA_OP_DEF, then merging the 2 blocks will change the value of the def, 1335 which potentially means the semantics of one of the blocks will be changed. 1336 TODO: make this check more precise. */ 1337 if (vuse_escaped && vuse1 != vuse2) 1338 return; 1339 1340 if (dump_file) 1341 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n", 1342 bb1->index, bb2->index); 1343 1344 set_cluster (bb1, bb2); 1345 } 1346 1347 /* Returns whether for all phis in DEST the phi alternatives for E1 and 1348 E2 are equal. */ 1349 1350 static bool 1351 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2) 1352 { 1353 int n1 = e1->dest_idx, n2 = e2->dest_idx; 1354 gphi_iterator gsi; 1355 1356 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi)) 1357 { 1358 gphi *phi = gsi.phi (); 1359 tree lhs = gimple_phi_result (phi); 1360 tree val1 = gimple_phi_arg_def (phi, n1); 1361 tree val2 = gimple_phi_arg_def (phi, n2); 1362 1363 if (virtual_operand_p (lhs)) 1364 continue; 1365 1366 if (operand_equal_for_phi_arg_p (val1, val2)) 1367 continue; 1368 if (gvn_uses_equal (val1, val2)) 1369 continue; 1370 1371 return false; 1372 } 1373 1374 return true; 1375 } 1376 1377 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the 1378 phi alternatives for BB1 and BB2 are equal. */ 1379 1380 static bool 1381 same_phi_alternatives (same_succ *same_succ, basic_block bb1, basic_block bb2) 1382 { 1383 unsigned int s; 1384 bitmap_iterator bs; 1385 edge e1, e2; 1386 basic_block succ; 1387 1388 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs) 1389 { 1390 succ = BASIC_BLOCK_FOR_FN (cfun, s); 1391 e1 = find_edge (bb1, succ); 1392 e2 = find_edge (bb2, succ); 1393 if (e1->flags & EDGE_COMPLEX 1394 || e2->flags & EDGE_COMPLEX) 1395 return false; 1396 1397 /* For all phis in bb, the phi alternatives for e1 and e2 need to have 1398 the same value. */ 1399 if (!same_phi_alternatives_1 (succ, e1, e2)) 1400 return false; 1401 } 1402 1403 return true; 1404 } 1405 1406 /* Return true if BB has non-vop phis. */ 1407 1408 static bool 1409 bb_has_non_vop_phi (basic_block bb) 1410 { 1411 gimple_seq phis = phi_nodes (bb); 1412 gimple *phi; 1413 1414 if (phis == NULL) 1415 return false; 1416 1417 if (!gimple_seq_singleton_p (phis)) 1418 return true; 1419 1420 phi = gimple_seq_first_stmt (phis); 1421 return !virtual_operand_p (gimple_phi_result (phi)); 1422 } 1423 1424 /* Returns true if redirecting the incoming edges of FROM to TO maintains the 1425 invariant that uses in FROM are dominates by their defs. */ 1426 1427 static bool 1428 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to) 1429 { 1430 basic_block cd, dep_bb = BB_DEP_BB (to); 1431 edge_iterator ei; 1432 edge e; 1433 1434 if (dep_bb == NULL) 1435 return true; 1436 1437 bitmap from_preds = BITMAP_ALLOC (NULL); 1438 FOR_EACH_EDGE (e, ei, from->preds) 1439 bitmap_set_bit (from_preds, e->src->index); 1440 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds); 1441 BITMAP_FREE (from_preds); 1442 1443 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd); 1444 } 1445 1446 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its 1447 replacement bb) and vice versa maintains the invariant that uses in the 1448 replacement are dominates by their defs. */ 1449 1450 static bool 1451 deps_ok_for_redirect (basic_block bb1, basic_block bb2) 1452 { 1453 if (BB_CLUSTER (bb1) != NULL) 1454 bb1 = BB_CLUSTER (bb1)->rep_bb; 1455 1456 if (BB_CLUSTER (bb2) != NULL) 1457 bb2 = BB_CLUSTER (bb2)->rep_bb; 1458 1459 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2) 1460 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1)); 1461 } 1462 1463 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */ 1464 1465 static void 1466 find_clusters_1 (same_succ *same_succ) 1467 { 1468 basic_block bb1, bb2; 1469 unsigned int i, j; 1470 bitmap_iterator bi, bj; 1471 int nr_comparisons; 1472 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS); 1473 1474 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi) 1475 { 1476 bb1 = BASIC_BLOCK_FOR_FN (cfun, i); 1477 1478 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding 1479 phi-nodes in bb1 and bb2, with the same alternatives for the same 1480 preds. */ 1481 if (bb_has_non_vop_phi (bb1) || bb_has_eh_pred (bb1) 1482 || bb_has_abnormal_pred (bb1)) 1483 continue; 1484 1485 nr_comparisons = 0; 1486 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj) 1487 { 1488 bb2 = BASIC_BLOCK_FOR_FN (cfun, j); 1489 1490 if (bb_has_non_vop_phi (bb2) || bb_has_eh_pred (bb2) 1491 || bb_has_abnormal_pred (bb2)) 1492 continue; 1493 1494 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2)) 1495 continue; 1496 1497 /* Limit quadratic behavior. */ 1498 nr_comparisons++; 1499 if (nr_comparisons > max_comparisons) 1500 break; 1501 1502 /* This is a conservative dependency check. We could test more 1503 precise for allowed replacement direction. */ 1504 if (!deps_ok_for_redirect (bb1, bb2)) 1505 continue; 1506 1507 if (!(same_phi_alternatives (same_succ, bb1, bb2))) 1508 continue; 1509 1510 find_duplicate (same_succ, bb1, bb2); 1511 } 1512 } 1513 } 1514 1515 /* Find clusters of bbs which can be merged. */ 1516 1517 static void 1518 find_clusters (void) 1519 { 1520 same_succ *same; 1521 1522 while (!worklist.is_empty ()) 1523 { 1524 same = worklist.pop (); 1525 same->in_worklist = false; 1526 if (dump_file && (dump_flags & TDF_DETAILS)) 1527 { 1528 fprintf (dump_file, "processing worklist entry\n"); 1529 same_succ_print (dump_file, same); 1530 } 1531 find_clusters_1 (same); 1532 } 1533 } 1534 1535 /* Returns the vop phi of BB, if any. */ 1536 1537 static gphi * 1538 vop_phi (basic_block bb) 1539 { 1540 gphi *stmt; 1541 gphi_iterator gsi; 1542 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1543 { 1544 stmt = gsi.phi (); 1545 if (! virtual_operand_p (gimple_phi_result (stmt))) 1546 continue; 1547 return stmt; 1548 } 1549 return NULL; 1550 } 1551 1552 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */ 1553 1554 static void 1555 replace_block_by (basic_block bb1, basic_block bb2) 1556 { 1557 edge pred_edge; 1558 unsigned int i; 1559 gphi *bb2_phi; 1560 1561 bb2_phi = vop_phi (bb2); 1562 1563 /* Mark the basic block as deleted. */ 1564 mark_basic_block_deleted (bb1); 1565 1566 /* Redirect the incoming edges of bb1 to bb2. */ 1567 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i) 1568 { 1569 pred_edge = EDGE_PRED (bb1, i - 1); 1570 pred_edge = redirect_edge_and_branch (pred_edge, bb2); 1571 gcc_assert (pred_edge != NULL); 1572 1573 if (bb2_phi == NULL) 1574 continue; 1575 1576 /* The phi might have run out of capacity when the redirect added an 1577 argument, which means it could have been replaced. Refresh it. */ 1578 bb2_phi = vop_phi (bb2); 1579 1580 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)), 1581 pred_edge, UNKNOWN_LOCATION); 1582 } 1583 1584 1585 /* Merge the outgoing edge counts from bb1 onto bb2. */ 1586 edge e1, e2; 1587 edge_iterator ei; 1588 1589 if (bb2->count.initialized_p ()) 1590 FOR_EACH_EDGE (e1, ei, bb1->succs) 1591 { 1592 e2 = find_edge (bb2, e1->dest); 1593 gcc_assert (e2); 1594 1595 /* If probabilities are same, we are done. 1596 If counts are nonzero we can distribute accordingly. In remaining 1597 cases just avreage the values and hope for the best. */ 1598 e2->probability = e1->probability.combine_with_count 1599 (bb1->count, e2->probability, bb2->count); 1600 } 1601 bb2->count += bb1->count; 1602 1603 /* Move over any user labels from bb1 after the bb2 labels. */ 1604 gimple_stmt_iterator gsi1 = gsi_start_bb (bb1); 1605 if (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL) 1606 { 1607 gimple_stmt_iterator gsi2 = gsi_after_labels (bb2); 1608 while (!gsi_end_p (gsi1) 1609 && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL) 1610 { 1611 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1))); 1612 gcc_assert (!DECL_NONLOCAL (label) && !FORCED_LABEL (label)); 1613 if (DECL_ARTIFICIAL (label)) 1614 gsi_next (&gsi1); 1615 else 1616 gsi_move_before (&gsi1, &gsi2); 1617 } 1618 } 1619 1620 /* Clear range info from all stmts in BB2 -- this transformation 1621 could make them out of date. */ 1622 reset_flow_sensitive_info_in_bb (bb2); 1623 1624 /* Do updates that use bb1, before deleting bb1. */ 1625 release_last_vdef (bb1); 1626 same_succ_flush_bb (bb1); 1627 1628 delete_basic_block (bb1); 1629 } 1630 1631 /* Bbs for which update_debug_stmt need to be called. */ 1632 1633 static bitmap update_bbs; 1634 1635 /* For each cluster in all_clusters, merge all cluster->bbs. Returns 1636 number of bbs removed. */ 1637 1638 static int 1639 apply_clusters (void) 1640 { 1641 basic_block bb1, bb2; 1642 bb_cluster *c; 1643 unsigned int i, j; 1644 bitmap_iterator bj; 1645 int nr_bbs_removed = 0; 1646 1647 for (i = 0; i < all_clusters.length (); ++i) 1648 { 1649 c = all_clusters[i]; 1650 if (c == NULL) 1651 continue; 1652 1653 bb2 = c->rep_bb; 1654 bitmap_set_bit (update_bbs, bb2->index); 1655 1656 bitmap_clear_bit (c->bbs, bb2->index); 1657 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj) 1658 { 1659 bb1 = BASIC_BLOCK_FOR_FN (cfun, j); 1660 bitmap_clear_bit (update_bbs, bb1->index); 1661 1662 replace_block_by (bb1, bb2); 1663 nr_bbs_removed++; 1664 } 1665 } 1666 1667 return nr_bbs_removed; 1668 } 1669 1670 /* Resets debug statement STMT if it has uses that are not dominated by their 1671 defs. */ 1672 1673 static void 1674 update_debug_stmt (gimple *stmt) 1675 { 1676 use_operand_p use_p; 1677 ssa_op_iter oi; 1678 basic_block bbuse; 1679 1680 if (!gimple_debug_bind_p (stmt)) 1681 return; 1682 1683 bbuse = gimple_bb (stmt); 1684 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE) 1685 { 1686 tree name = USE_FROM_PTR (use_p); 1687 gimple *def_stmt = SSA_NAME_DEF_STMT (name); 1688 basic_block bbdef = gimple_bb (def_stmt); 1689 if (bbdef == NULL || bbuse == bbdef 1690 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)) 1691 continue; 1692 1693 gimple_debug_bind_reset_value (stmt); 1694 update_stmt (stmt); 1695 break; 1696 } 1697 } 1698 1699 /* Resets all debug statements that have uses that are not 1700 dominated by their defs. */ 1701 1702 static void 1703 update_debug_stmts (void) 1704 { 1705 basic_block bb; 1706 bitmap_iterator bi; 1707 unsigned int i; 1708 1709 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi) 1710 { 1711 gimple *stmt; 1712 gimple_stmt_iterator gsi; 1713 1714 bb = BASIC_BLOCK_FOR_FN (cfun, i); 1715 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1716 { 1717 stmt = gsi_stmt (gsi); 1718 if (!is_gimple_debug (stmt)) 1719 continue; 1720 update_debug_stmt (stmt); 1721 } 1722 } 1723 } 1724 1725 /* Runs tail merge optimization. */ 1726 1727 unsigned int 1728 tail_merge_optimize (unsigned int todo) 1729 { 1730 int nr_bbs_removed_total = 0; 1731 int nr_bbs_removed; 1732 bool loop_entered = false; 1733 int iteration_nr = 0; 1734 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS); 1735 1736 if (!flag_tree_tail_merge 1737 || max_iterations == 0) 1738 return 0; 1739 1740 timevar_push (TV_TREE_TAIL_MERGE); 1741 1742 /* We enter from PRE which has critical edges split. Elimination 1743 does not process trivially dead code so cleanup the CFG if we 1744 are told so. And re-split critical edges then. */ 1745 if (todo & TODO_cleanup_cfg) 1746 { 1747 cleanup_tree_cfg (); 1748 todo &= ~TODO_cleanup_cfg; 1749 split_critical_edges (); 1750 } 1751 1752 if (!dom_info_available_p (CDI_DOMINATORS)) 1753 { 1754 /* PRE can leave us with unreachable blocks, remove them now. */ 1755 delete_unreachable_blocks (); 1756 calculate_dominance_info (CDI_DOMINATORS); 1757 } 1758 init_worklist (); 1759 1760 while (!worklist.is_empty ()) 1761 { 1762 if (!loop_entered) 1763 { 1764 loop_entered = true; 1765 alloc_cluster_vectors (); 1766 update_bbs = BITMAP_ALLOC (NULL); 1767 } 1768 else 1769 reset_cluster_vectors (); 1770 1771 iteration_nr++; 1772 if (dump_file && (dump_flags & TDF_DETAILS)) 1773 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr); 1774 1775 find_clusters (); 1776 gcc_assert (worklist.is_empty ()); 1777 if (all_clusters.is_empty ()) 1778 break; 1779 1780 nr_bbs_removed = apply_clusters (); 1781 nr_bbs_removed_total += nr_bbs_removed; 1782 if (nr_bbs_removed == 0) 1783 break; 1784 1785 free_dominance_info (CDI_DOMINATORS); 1786 1787 if (iteration_nr == max_iterations) 1788 break; 1789 1790 calculate_dominance_info (CDI_DOMINATORS); 1791 update_worklist (); 1792 } 1793 1794 if (dump_file && (dump_flags & TDF_DETAILS)) 1795 fprintf (dump_file, "htab collision / search: %f\n", 1796 same_succ_htab->collisions ()); 1797 1798 if (nr_bbs_removed_total > 0) 1799 { 1800 if (MAY_HAVE_DEBUG_BIND_STMTS) 1801 { 1802 calculate_dominance_info (CDI_DOMINATORS); 1803 update_debug_stmts (); 1804 } 1805 1806 if (dump_file && (dump_flags & TDF_DETAILS)) 1807 { 1808 fprintf (dump_file, "Before TODOs.\n"); 1809 dump_function_to_file (current_function_decl, dump_file, dump_flags); 1810 } 1811 1812 mark_virtual_operands_for_renaming (cfun); 1813 } 1814 1815 delete_worklist (); 1816 if (loop_entered) 1817 { 1818 delete_cluster_vectors (); 1819 BITMAP_FREE (update_bbs); 1820 } 1821 1822 timevar_pop (TV_TREE_TAIL_MERGE); 1823 1824 return todo; 1825 } 1826