1 /* Tail merging for gimple. 2 Copyright (C) 2011, 2012 Free Software Foundation, Inc. 3 Contributed by Tom de Vries (tom@codesourcery.com) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 /* Pass overview. 22 23 24 MOTIVATIONAL EXAMPLE 25 26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at 27 28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601) 29 { 30 struct FILED.1638 * fpD.2605; 31 charD.1 fileNameD.2604[1000]; 32 intD.0 D.3915; 33 const charD.1 * restrict outputFileName.0D.3914; 34 35 # BLOCK 2 freq:10000 36 # PRED: ENTRY [100.0%] (fallthru,exec) 37 # PT = nonlocal { D.3926 } (restr) 38 outputFileName.0D.3914_3 39 = (const charD.1 * restrict) outputFileNameD.2600_2(D); 40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)> 41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3); 44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13> 45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1); 48 if (D.3915_4 == 0) 49 goto <bb 3>; 50 else 51 goto <bb 4>; 52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec) 53 54 # BLOCK 3 freq:1000 55 # PRED: 2 [10.0%] (true,exec) 56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14> 57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 59 freeD.898 (ctxD.2601_5(D)); 60 goto <bb 7>; 61 # SUCC: 7 [100.0%] (fallthru,exec) 62 63 # BLOCK 4 freq:9000 64 # PRED: 2 [90.0%] (false,exec) 65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14> 66 # PT = nonlocal escaped 67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B); 70 if (fpD.2605_8 == 0B) 71 goto <bb 5>; 72 else 73 goto <bb 6>; 74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec) 75 76 # BLOCK 5 freq:173 77 # PRED: 4 [1.9%] (true,exec) 78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16> 79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 81 freeD.898 (ctxD.2601_5(D)); 82 goto <bb 7>; 83 # SUCC: 7 [100.0%] (fallthru,exec) 84 85 # BLOCK 6 freq:8827 86 # PRED: 4 [98.1%] (false,exec) 87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16> 88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr) 89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr) 90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8); 91 # SUCC: 7 [100.0%] (fallthru,exec) 92 93 # BLOCK 7 freq:10000 94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec) 95 6 [100.0%] (fallthru,exec) 96 # PT = nonlocal null 97 98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)> 99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5), 100 .MEMD.3923_18(6)> 101 # VUSE <.MEMD.3923_11> 102 return ctxD.2601_1; 103 # SUCC: EXIT [100.0%] 104 } 105 106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the 107 same successors, and the same operations. 108 109 110 CONTEXT 111 112 A technique called tail merging (or cross jumping) can fix the example 113 above. For a block, we look for common code at the end (the tail) of the 114 predecessor blocks, and insert jumps from one block to the other. 115 The example is a special case for tail merging, in that 2 whole blocks 116 can be merged, rather than just the end parts of it. 117 We currently only focus on whole block merging, so in that sense 118 calling this pass tail merge is a bit of a misnomer. 119 120 We distinguish 2 kinds of situations in which blocks can be merged: 121 - same operations, same predecessors. The successor edges coming from one 122 block are redirected to come from the other block. 123 - same operations, same successors. The predecessor edges entering one block 124 are redirected to enter the other block. Note that this operation might 125 involve introducing phi operations. 126 127 For efficient implementation, we would like to value numbers the blocks, and 128 have a comparison operator that tells us whether the blocks are equal. 129 Besides being runtime efficient, block value numbering should also abstract 130 from irrelevant differences in order of operations, much like normal value 131 numbering abstracts from irrelevant order of operations. 132 133 For the first situation (same_operations, same predecessors), normal value 134 numbering fits well. We can calculate a block value number based on the 135 value numbers of the defs and vdefs. 136 137 For the second situation (same operations, same successors), this approach 138 doesn't work so well. We can illustrate this using the example. The calls 139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will 140 remain different in value numbering, since they represent different memory 141 states. So the resulting vdefs of the frees will be different in value 142 numbering, so the block value numbers will be different. 143 144 The reason why we call the blocks equal is not because they define the same 145 values, but because uses in the blocks use (possibly different) defs in the 146 same way. To be able to detect this efficiently, we need to do some kind of 147 reverse value numbering, meaning number the uses rather than the defs, and 148 calculate a block value number based on the value number of the uses. 149 Ideally, a block comparison operator will also indicate which phis are needed 150 to merge the blocks. 151 152 For the moment, we don't do block value numbering, but we do insn-by-insn 153 matching, using scc value numbers to match operations with results, and 154 structural comparison otherwise, while ignoring vop mismatches. 155 156 157 IMPLEMENTATION 158 159 1. The pass first determines all groups of blocks with the same successor 160 blocks. 161 2. Within each group, it tries to determine clusters of equal basic blocks. 162 3. The clusters are applied. 163 4. The same successor groups are updated. 164 5. This process is repeated from 2 onwards, until no more changes. 165 166 167 LIMITATIONS/TODO 168 169 - block only 170 - handles only 'same operations, same successors'. 171 It handles same predecessors as a special subcase though. 172 - does not implement the reverse value numbering and block value numbering. 173 - improve memory allocation: use garbage collected memory, obstacks, 174 allocpools where appropriate. 175 - no insertion of gimple_reg phis, We only introduce vop-phis. 176 - handle blocks with gimple_reg phi_nodes. 177 178 179 SWITCHES 180 181 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */ 182 183 #include "config.h" 184 #include "system.h" 185 #include "coretypes.h" 186 #include "tm.h" 187 #include "tree.h" 188 #include "tm_p.h" 189 #include "basic-block.h" 190 #include "output.h" 191 #include "flags.h" 192 #include "function.h" 193 #include "tree-flow.h" 194 #include "timevar.h" 195 #include "bitmap.h" 196 #include "tree-ssa-alias.h" 197 #include "params.h" 198 #include "tree-pretty-print.h" 199 #include "hashtab.h" 200 #include "gimple-pretty-print.h" 201 #include "tree-ssa-sccvn.h" 202 #include "tree-dump.h" 203 204 /* Describes a group of bbs with the same successors. The successor bbs are 205 cached in succs, and the successor edge flags are cached in succ_flags. 206 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags, 207 it's marked in inverse. 208 Additionally, the hash value for the struct is cached in hashval, and 209 in_worklist indicates whether it's currently part of worklist. */ 210 211 struct same_succ_def 212 { 213 /* The bbs that have the same successor bbs. */ 214 bitmap bbs; 215 /* The successor bbs. */ 216 bitmap succs; 217 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for 218 bb. */ 219 bitmap inverse; 220 /* The edge flags for each of the successor bbs. */ 221 VEC (int, heap) *succ_flags; 222 /* Indicates whether the struct is currently in the worklist. */ 223 bool in_worklist; 224 /* The hash value of the struct. */ 225 hashval_t hashval; 226 }; 227 typedef struct same_succ_def *same_succ; 228 typedef const struct same_succ_def *const_same_succ; 229 230 /* A group of bbs where 1 bb from bbs can replace the other bbs. */ 231 232 struct bb_cluster_def 233 { 234 /* The bbs in the cluster. */ 235 bitmap bbs; 236 /* The preds of the bbs in the cluster. */ 237 bitmap preds; 238 /* Index in all_clusters vector. */ 239 int index; 240 /* The bb to replace the cluster with. */ 241 basic_block rep_bb; 242 }; 243 typedef struct bb_cluster_def *bb_cluster; 244 typedef const struct bb_cluster_def *const_bb_cluster; 245 246 /* Per bb-info. */ 247 248 struct aux_bb_info 249 { 250 /* The number of non-debug statements in the bb. */ 251 int size; 252 /* The same_succ that this bb is a member of. */ 253 same_succ bb_same_succ; 254 /* The cluster that this bb is a member of. */ 255 bb_cluster cluster; 256 /* The vop state at the exit of a bb. This is shortlived data, used to 257 communicate data between update_block_by and update_vuses. */ 258 tree vop_at_exit; 259 /* The bb that either contains or is dominated by the dependencies of the 260 bb. */ 261 basic_block dep_bb; 262 }; 263 264 /* Macros to access the fields of struct aux_bb_info. */ 265 266 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size) 267 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ) 268 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster) 269 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit) 270 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb) 271 272 /* VAL1 and VAL2 are either: 273 - uses in BB1 and BB2, or 274 - phi alternatives for BB1 and BB2. 275 Return true if the uses have the same gvn value. */ 276 277 static bool 278 gvn_uses_equal (tree val1, tree val2) 279 { 280 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE); 281 282 if (val1 == val2) 283 return true; 284 285 if (vn_valueize (val1) != vn_valueize (val2)) 286 return false; 287 288 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1)) 289 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2))); 290 } 291 292 /* Prints E to FILE. */ 293 294 static void 295 same_succ_print (FILE *file, const same_succ e) 296 { 297 unsigned int i; 298 bitmap_print (file, e->bbs, "bbs:", "\n"); 299 bitmap_print (file, e->succs, "succs:", "\n"); 300 bitmap_print (file, e->inverse, "inverse:", "\n"); 301 fprintf (file, "flags:"); 302 for (i = 0; i < VEC_length (int, e->succ_flags); ++i) 303 fprintf (file, " %x", VEC_index (int, e->succ_flags, i)); 304 fprintf (file, "\n"); 305 } 306 307 /* Prints same_succ VE to VFILE. */ 308 309 static int 310 same_succ_print_traverse (void **ve, void *vfile) 311 { 312 const same_succ e = *((const same_succ *)ve); 313 FILE *file = ((FILE*)vfile); 314 same_succ_print (file, e); 315 return 1; 316 } 317 318 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */ 319 320 static void 321 update_dep_bb (basic_block use_bb, tree val) 322 { 323 basic_block dep_bb; 324 325 /* Not a dep. */ 326 if (TREE_CODE (val) != SSA_NAME) 327 return; 328 329 /* Skip use of global def. */ 330 if (SSA_NAME_IS_DEFAULT_DEF (val)) 331 return; 332 333 /* Skip use of local def. */ 334 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val)); 335 if (dep_bb == use_bb) 336 return; 337 338 if (BB_DEP_BB (use_bb) == NULL 339 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb))) 340 BB_DEP_BB (use_bb) = dep_bb; 341 } 342 343 /* Update BB_DEP_BB, given the dependencies in STMT. */ 344 345 static void 346 stmt_update_dep_bb (gimple stmt) 347 { 348 ssa_op_iter iter; 349 use_operand_p use; 350 351 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE) 352 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use)); 353 } 354 355 /* Returns whether VAL is used in the same bb as in which it is defined, or 356 in the phi of a successor bb. */ 357 358 static bool 359 local_def (tree val) 360 { 361 gimple stmt, def_stmt; 362 basic_block bb, def_bb; 363 imm_use_iterator iter; 364 bool res; 365 366 if (TREE_CODE (val) != SSA_NAME) 367 return false; 368 def_stmt = SSA_NAME_DEF_STMT (val); 369 def_bb = gimple_bb (def_stmt); 370 371 res = true; 372 FOR_EACH_IMM_USE_STMT (stmt, iter, val) 373 { 374 if (is_gimple_debug (stmt)) 375 continue; 376 bb = gimple_bb (stmt); 377 if (bb == def_bb) 378 continue; 379 if (gimple_code (stmt) == GIMPLE_PHI 380 && find_edge (def_bb, bb)) 381 continue; 382 res = false; 383 BREAK_FROM_IMM_USE_STMT (iter); 384 } 385 return res; 386 } 387 388 /* Calculates hash value for same_succ VE. */ 389 390 static hashval_t 391 same_succ_hash (const void *ve) 392 { 393 const_same_succ e = (const_same_succ)ve; 394 hashval_t hashval = bitmap_hash (e->succs); 395 int flags; 396 unsigned int i; 397 unsigned int first = bitmap_first_set_bit (e->bbs); 398 basic_block bb = BASIC_BLOCK (first); 399 int size = 0; 400 gimple_stmt_iterator gsi; 401 gimple stmt; 402 tree arg; 403 unsigned int s; 404 bitmap_iterator bs; 405 406 for (gsi = gsi_start_nondebug_bb (bb); 407 !gsi_end_p (gsi); gsi_next_nondebug (&gsi)) 408 { 409 stmt = gsi_stmt (gsi); 410 stmt_update_dep_bb (stmt); 411 if (is_gimple_assign (stmt) && local_def (gimple_get_lhs (stmt)) 412 && !gimple_has_side_effects (stmt)) 413 continue; 414 size++; 415 416 hashval = iterative_hash_hashval_t (gimple_code (stmt), hashval); 417 if (is_gimple_assign (stmt)) 418 hashval = iterative_hash_hashval_t (gimple_assign_rhs_code (stmt), 419 hashval); 420 if (!is_gimple_call (stmt)) 421 continue; 422 if (gimple_call_internal_p (stmt)) 423 hashval = iterative_hash_hashval_t 424 ((hashval_t) gimple_call_internal_fn (stmt), hashval); 425 else 426 hashval = iterative_hash_expr (gimple_call_fn (stmt), hashval); 427 for (i = 0; i < gimple_call_num_args (stmt); i++) 428 { 429 arg = gimple_call_arg (stmt, i); 430 arg = vn_valueize (arg); 431 hashval = iterative_hash_expr (arg, hashval); 432 } 433 } 434 435 hashval = iterative_hash_hashval_t (size, hashval); 436 BB_SIZE (bb) = size; 437 438 for (i = 0; i < VEC_length (int, e->succ_flags); ++i) 439 { 440 flags = VEC_index (int, e->succ_flags, i); 441 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 442 hashval = iterative_hash_hashval_t (flags, hashval); 443 } 444 445 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs) 446 { 447 int n = find_edge (bb, BASIC_BLOCK (s))->dest_idx; 448 for (gsi = gsi_start_phis (BASIC_BLOCK (s)); !gsi_end_p (gsi); 449 gsi_next (&gsi)) 450 { 451 gimple phi = gsi_stmt (gsi); 452 tree lhs = gimple_phi_result (phi); 453 tree val = gimple_phi_arg_def (phi, n); 454 455 if (!is_gimple_reg (lhs)) 456 continue; 457 update_dep_bb (bb, val); 458 } 459 } 460 461 return hashval; 462 } 463 464 /* Returns true if E1 and E2 have 2 successors, and if the successor flags 465 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for 466 the other edge flags. */ 467 468 static bool 469 inverse_flags (const_same_succ e1, const_same_succ e2) 470 { 471 int f1a, f1b, f2a, f2b; 472 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); 473 474 if (VEC_length (int, e1->succ_flags) != 2) 475 return false; 476 477 f1a = VEC_index (int, e1->succ_flags, 0); 478 f1b = VEC_index (int, e1->succ_flags, 1); 479 f2a = VEC_index (int, e2->succ_flags, 0); 480 f2b = VEC_index (int, e2->succ_flags, 1); 481 482 if (f1a == f2a && f1b == f2b) 483 return false; 484 485 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask); 486 } 487 488 /* Compares SAME_SUCCs VE1 and VE2. */ 489 490 static int 491 same_succ_equal (const void *ve1, const void *ve2) 492 { 493 const_same_succ e1 = (const_same_succ)ve1; 494 const_same_succ e2 = (const_same_succ)ve2; 495 unsigned int i, first1, first2; 496 gimple_stmt_iterator gsi1, gsi2; 497 gimple s1, s2; 498 basic_block bb1, bb2; 499 500 if (e1->hashval != e2->hashval) 501 return 0; 502 503 if (VEC_length (int, e1->succ_flags) != VEC_length (int, e2->succ_flags)) 504 return 0; 505 506 if (!bitmap_equal_p (e1->succs, e2->succs)) 507 return 0; 508 509 if (!inverse_flags (e1, e2)) 510 { 511 for (i = 0; i < VEC_length (int, e1->succ_flags); ++i) 512 if (VEC_index (int, e1->succ_flags, i) 513 != VEC_index (int, e1->succ_flags, i)) 514 return 0; 515 } 516 517 first1 = bitmap_first_set_bit (e1->bbs); 518 first2 = bitmap_first_set_bit (e2->bbs); 519 520 bb1 = BASIC_BLOCK (first1); 521 bb2 = BASIC_BLOCK (first2); 522 523 if (BB_SIZE (bb1) != BB_SIZE (bb2)) 524 return 0; 525 526 gsi1 = gsi_start_nondebug_bb (bb1); 527 gsi2 = gsi_start_nondebug_bb (bb2); 528 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2))) 529 { 530 s1 = gsi_stmt (gsi1); 531 s2 = gsi_stmt (gsi2); 532 if (gimple_code (s1) != gimple_code (s2)) 533 return 0; 534 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2)) 535 return 0; 536 gsi_next_nondebug (&gsi1); 537 gsi_next_nondebug (&gsi2); 538 } 539 540 return 1; 541 } 542 543 /* Alloc and init a new SAME_SUCC. */ 544 545 static same_succ 546 same_succ_alloc (void) 547 { 548 same_succ same = XNEW (struct same_succ_def); 549 550 same->bbs = BITMAP_ALLOC (NULL); 551 same->succs = BITMAP_ALLOC (NULL); 552 same->inverse = BITMAP_ALLOC (NULL); 553 same->succ_flags = VEC_alloc (int, heap, 10); 554 same->in_worklist = false; 555 556 return same; 557 } 558 559 /* Delete same_succ VE. */ 560 561 static void 562 same_succ_delete (void *ve) 563 { 564 same_succ e = (same_succ)ve; 565 566 BITMAP_FREE (e->bbs); 567 BITMAP_FREE (e->succs); 568 BITMAP_FREE (e->inverse); 569 VEC_free (int, heap, e->succ_flags); 570 571 XDELETE (ve); 572 } 573 574 /* Reset same_succ SAME. */ 575 576 static void 577 same_succ_reset (same_succ same) 578 { 579 bitmap_clear (same->bbs); 580 bitmap_clear (same->succs); 581 bitmap_clear (same->inverse); 582 VEC_truncate (int, same->succ_flags, 0); 583 } 584 585 /* Hash table with all same_succ entries. */ 586 587 static htab_t same_succ_htab; 588 589 /* Array that is used to store the edge flags for a successor. */ 590 591 static int *same_succ_edge_flags; 592 593 /* Bitmap that is used to mark bbs that are recently deleted. */ 594 595 static bitmap deleted_bbs; 596 597 /* Bitmap that is used to mark predecessors of bbs that are 598 deleted. */ 599 600 static bitmap deleted_bb_preds; 601 602 /* Prints same_succ_htab to stderr. */ 603 604 extern void debug_same_succ (void); 605 DEBUG_FUNCTION void 606 debug_same_succ ( void) 607 { 608 htab_traverse (same_succ_htab, same_succ_print_traverse, stderr); 609 } 610 611 DEF_VEC_P (same_succ); 612 DEF_VEC_ALLOC_P (same_succ, heap); 613 614 /* Vector of bbs to process. */ 615 616 static VEC (same_succ, heap) *worklist; 617 618 /* Prints worklist to FILE. */ 619 620 static void 621 print_worklist (FILE *file) 622 { 623 unsigned int i; 624 for (i = 0; i < VEC_length (same_succ, worklist); ++i) 625 same_succ_print (file, VEC_index (same_succ, worklist, i)); 626 } 627 628 /* Adds SAME to worklist. */ 629 630 static void 631 add_to_worklist (same_succ same) 632 { 633 if (same->in_worklist) 634 return; 635 636 if (bitmap_count_bits (same->bbs) < 2) 637 return; 638 639 same->in_worklist = true; 640 VEC_safe_push (same_succ, heap, worklist, same); 641 } 642 643 /* Add BB to same_succ_htab. */ 644 645 static void 646 find_same_succ_bb (basic_block bb, same_succ *same_p) 647 { 648 unsigned int j; 649 bitmap_iterator bj; 650 same_succ same = *same_p; 651 same_succ *slot; 652 edge_iterator ei; 653 edge e; 654 655 if (bb == NULL) 656 return; 657 bitmap_set_bit (same->bbs, bb->index); 658 FOR_EACH_EDGE (e, ei, bb->succs) 659 { 660 int index = e->dest->index; 661 bitmap_set_bit (same->succs, index); 662 same_succ_edge_flags[index] = e->flags; 663 } 664 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj) 665 VEC_safe_push (int, heap, same->succ_flags, same_succ_edge_flags[j]); 666 667 same->hashval = same_succ_hash (same); 668 669 slot = (same_succ *) htab_find_slot_with_hash (same_succ_htab, same, 670 same->hashval, INSERT); 671 if (*slot == NULL) 672 { 673 *slot = same; 674 BB_SAME_SUCC (bb) = same; 675 add_to_worklist (same); 676 *same_p = NULL; 677 } 678 else 679 { 680 bitmap_set_bit ((*slot)->bbs, bb->index); 681 BB_SAME_SUCC (bb) = *slot; 682 add_to_worklist (*slot); 683 if (inverse_flags (same, *slot)) 684 bitmap_set_bit ((*slot)->inverse, bb->index); 685 same_succ_reset (same); 686 } 687 } 688 689 /* Find bbs with same successors. */ 690 691 static void 692 find_same_succ (void) 693 { 694 same_succ same = same_succ_alloc (); 695 basic_block bb; 696 697 FOR_EACH_BB (bb) 698 { 699 find_same_succ_bb (bb, &same); 700 if (same == NULL) 701 same = same_succ_alloc (); 702 } 703 704 same_succ_delete (same); 705 } 706 707 /* Initializes worklist administration. */ 708 709 static void 710 init_worklist (void) 711 { 712 alloc_aux_for_blocks (sizeof (struct aux_bb_info)); 713 same_succ_htab 714 = htab_create (n_basic_blocks, same_succ_hash, same_succ_equal, 715 same_succ_delete); 716 same_succ_edge_flags = XCNEWVEC (int, last_basic_block); 717 deleted_bbs = BITMAP_ALLOC (NULL); 718 deleted_bb_preds = BITMAP_ALLOC (NULL); 719 worklist = VEC_alloc (same_succ, heap, n_basic_blocks); 720 find_same_succ (); 721 722 if (dump_file && (dump_flags & TDF_DETAILS)) 723 { 724 fprintf (dump_file, "initial worklist:\n"); 725 print_worklist (dump_file); 726 } 727 } 728 729 /* Deletes worklist administration. */ 730 731 static void 732 delete_worklist (void) 733 { 734 free_aux_for_blocks (); 735 htab_delete (same_succ_htab); 736 same_succ_htab = NULL; 737 XDELETEVEC (same_succ_edge_flags); 738 same_succ_edge_flags = NULL; 739 BITMAP_FREE (deleted_bbs); 740 BITMAP_FREE (deleted_bb_preds); 741 VEC_free (same_succ, heap, worklist); 742 } 743 744 /* Mark BB as deleted, and mark its predecessors. */ 745 746 static void 747 mark_basic_block_deleted (basic_block bb) 748 { 749 edge e; 750 edge_iterator ei; 751 752 bitmap_set_bit (deleted_bbs, bb->index); 753 754 FOR_EACH_EDGE (e, ei, bb->preds) 755 bitmap_set_bit (deleted_bb_preds, e->src->index); 756 } 757 758 /* Removes BB from its corresponding same_succ. */ 759 760 static void 761 same_succ_flush_bb (basic_block bb) 762 { 763 same_succ same = BB_SAME_SUCC (bb); 764 BB_SAME_SUCC (bb) = NULL; 765 if (bitmap_single_bit_set_p (same->bbs)) 766 htab_remove_elt_with_hash (same_succ_htab, same, same->hashval); 767 else 768 bitmap_clear_bit (same->bbs, bb->index); 769 } 770 771 /* Removes all bbs in BBS from their corresponding same_succ. */ 772 773 static void 774 same_succ_flush_bbs (bitmap bbs) 775 { 776 unsigned int i; 777 bitmap_iterator bi; 778 779 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi) 780 same_succ_flush_bb (BASIC_BLOCK (i)); 781 } 782 783 /* Release the last vdef in BB, either normal or phi result. */ 784 785 static void 786 release_last_vdef (basic_block bb) 787 { 788 gimple_stmt_iterator i; 789 790 for (i = gsi_last_bb (bb); !gsi_end_p (i); gsi_prev_nondebug (&i)) 791 { 792 gimple stmt = gsi_stmt (i); 793 if (gimple_vdef (stmt) == NULL_TREE) 794 continue; 795 796 mark_virtual_operand_for_renaming (gimple_vdef (stmt)); 797 return; 798 } 799 800 for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i)) 801 { 802 gimple phi = gsi_stmt (i); 803 tree res = gimple_phi_result (phi); 804 805 if (is_gimple_reg (res)) 806 continue; 807 808 mark_virtual_phi_result_for_renaming (phi); 809 return; 810 } 811 812 } 813 814 /* For deleted_bb_preds, find bbs with same successors. */ 815 816 static void 817 update_worklist (void) 818 { 819 unsigned int i; 820 bitmap_iterator bi; 821 basic_block bb; 822 same_succ same; 823 824 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs); 825 bitmap_clear (deleted_bbs); 826 827 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK); 828 same_succ_flush_bbs (deleted_bb_preds); 829 830 same = same_succ_alloc (); 831 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi) 832 { 833 bb = BASIC_BLOCK (i); 834 gcc_assert (bb != NULL); 835 find_same_succ_bb (bb, &same); 836 if (same == NULL) 837 same = same_succ_alloc (); 838 } 839 same_succ_delete (same); 840 bitmap_clear (deleted_bb_preds); 841 } 842 843 /* Prints cluster C to FILE. */ 844 845 static void 846 print_cluster (FILE *file, bb_cluster c) 847 { 848 if (c == NULL) 849 return; 850 bitmap_print (file, c->bbs, "bbs:", "\n"); 851 bitmap_print (file, c->preds, "preds:", "\n"); 852 } 853 854 /* Prints cluster C to stderr. */ 855 856 extern void debug_cluster (bb_cluster); 857 DEBUG_FUNCTION void 858 debug_cluster (bb_cluster c) 859 { 860 print_cluster (stderr, c); 861 } 862 863 /* Update C->rep_bb, given that BB is added to the cluster. */ 864 865 static void 866 update_rep_bb (bb_cluster c, basic_block bb) 867 { 868 /* Initial. */ 869 if (c->rep_bb == NULL) 870 { 871 c->rep_bb = bb; 872 return; 873 } 874 875 /* Current needs no deps, keep it. */ 876 if (BB_DEP_BB (c->rep_bb) == NULL) 877 return; 878 879 /* Bb needs no deps, change rep_bb. */ 880 if (BB_DEP_BB (bb) == NULL) 881 { 882 c->rep_bb = bb; 883 return; 884 } 885 886 /* Bb needs last deps earlier than current, change rep_bb. A potential 887 problem with this, is that the first deps might also be earlier, which 888 would mean we prefer longer lifetimes for the deps. To be able to check 889 for this, we would have to trace BB_FIRST_DEP_BB as well, besides 890 BB_DEP_BB, which is really BB_LAST_DEP_BB. 891 The benefit of choosing the bb with last deps earlier, is that it can 892 potentially be used as replacement for more bbs. */ 893 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb))) 894 c->rep_bb = bb; 895 } 896 897 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */ 898 899 static void 900 add_bb_to_cluster (bb_cluster c, basic_block bb) 901 { 902 edge e; 903 edge_iterator ei; 904 905 bitmap_set_bit (c->bbs, bb->index); 906 907 FOR_EACH_EDGE (e, ei, bb->preds) 908 bitmap_set_bit (c->preds, e->src->index); 909 910 update_rep_bb (c, bb); 911 } 912 913 /* Allocate and init new cluster. */ 914 915 static bb_cluster 916 new_cluster (void) 917 { 918 bb_cluster c; 919 c = XCNEW (struct bb_cluster_def); 920 c->bbs = BITMAP_ALLOC (NULL); 921 c->preds = BITMAP_ALLOC (NULL); 922 c->rep_bb = NULL; 923 return c; 924 } 925 926 /* Delete clusters. */ 927 928 static void 929 delete_cluster (bb_cluster c) 930 { 931 if (c == NULL) 932 return; 933 BITMAP_FREE (c->bbs); 934 BITMAP_FREE (c->preds); 935 XDELETE (c); 936 } 937 938 DEF_VEC_P (bb_cluster); 939 DEF_VEC_ALLOC_P (bb_cluster, heap); 940 941 /* Array that contains all clusters. */ 942 943 static VEC (bb_cluster, heap) *all_clusters; 944 945 /* Allocate all cluster vectors. */ 946 947 static void 948 alloc_cluster_vectors (void) 949 { 950 all_clusters = VEC_alloc (bb_cluster, heap, n_basic_blocks); 951 } 952 953 /* Reset all cluster vectors. */ 954 955 static void 956 reset_cluster_vectors (void) 957 { 958 unsigned int i; 959 basic_block bb; 960 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i) 961 delete_cluster (VEC_index (bb_cluster, all_clusters, i)); 962 VEC_truncate (bb_cluster, all_clusters, 0); 963 FOR_EACH_BB (bb) 964 BB_CLUSTER (bb) = NULL; 965 } 966 967 /* Delete all cluster vectors. */ 968 969 static void 970 delete_cluster_vectors (void) 971 { 972 unsigned int i; 973 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i) 974 delete_cluster (VEC_index (bb_cluster, all_clusters, i)); 975 VEC_free (bb_cluster, heap, all_clusters); 976 } 977 978 /* Merge cluster C2 into C1. */ 979 980 static void 981 merge_clusters (bb_cluster c1, bb_cluster c2) 982 { 983 bitmap_ior_into (c1->bbs, c2->bbs); 984 bitmap_ior_into (c1->preds, c2->preds); 985 } 986 987 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in 988 all_clusters, or merge c with existing cluster. */ 989 990 static void 991 set_cluster (basic_block bb1, basic_block bb2) 992 { 993 basic_block merge_bb, other_bb; 994 bb_cluster merge, old, c; 995 996 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL) 997 { 998 c = new_cluster (); 999 add_bb_to_cluster (c, bb1); 1000 add_bb_to_cluster (c, bb2); 1001 BB_CLUSTER (bb1) = c; 1002 BB_CLUSTER (bb2) = c; 1003 c->index = VEC_length (bb_cluster, all_clusters); 1004 VEC_safe_push (bb_cluster, heap, all_clusters, c); 1005 } 1006 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL) 1007 { 1008 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1; 1009 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2; 1010 merge = BB_CLUSTER (merge_bb); 1011 add_bb_to_cluster (merge, other_bb); 1012 BB_CLUSTER (other_bb) = merge; 1013 } 1014 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2)) 1015 { 1016 unsigned int i; 1017 bitmap_iterator bi; 1018 1019 old = BB_CLUSTER (bb2); 1020 merge = BB_CLUSTER (bb1); 1021 merge_clusters (merge, old); 1022 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi) 1023 BB_CLUSTER (BASIC_BLOCK (i)) = merge; 1024 VEC_replace (bb_cluster, all_clusters, old->index, NULL); 1025 update_rep_bb (merge, old->rep_bb); 1026 delete_cluster (old); 1027 } 1028 else 1029 gcc_unreachable (); 1030 } 1031 1032 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and 1033 gimple_bb (s2) are members of SAME_SUCC. */ 1034 1035 static bool 1036 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2) 1037 { 1038 unsigned int i; 1039 tree lhs1, lhs2; 1040 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2); 1041 tree t1, t2; 1042 bool equal, inv_cond; 1043 enum tree_code code1, code2; 1044 1045 if (gimple_code (s1) != gimple_code (s2)) 1046 return false; 1047 1048 switch (gimple_code (s1)) 1049 { 1050 case GIMPLE_CALL: 1051 if (gimple_call_num_args (s1) != gimple_call_num_args (s2)) 1052 return false; 1053 if (!gimple_call_same_target_p (s1, s2)) 1054 return false; 1055 1056 /* Eventually, we'll significantly complicate the CFG by adding 1057 back edges to properly model the effects of transaction restart. 1058 For the bulk of optimization this does not matter, but what we 1059 cannot recover from is tail merging blocks between two separate 1060 transactions. Avoid that by making commit not match. */ 1061 if (gimple_call_builtin_p (s1, BUILT_IN_TM_COMMIT)) 1062 return false; 1063 1064 equal = true; 1065 for (i = 0; i < gimple_call_num_args (s1); ++i) 1066 { 1067 t1 = gimple_call_arg (s1, i); 1068 t2 = gimple_call_arg (s2, i); 1069 if (operand_equal_p (t1, t2, 0)) 1070 continue; 1071 if (gvn_uses_equal (t1, t2)) 1072 continue; 1073 equal = false; 1074 break; 1075 } 1076 if (!equal) 1077 return false; 1078 1079 lhs1 = gimple_get_lhs (s1); 1080 lhs2 = gimple_get_lhs (s2); 1081 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE) 1082 return true; 1083 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE) 1084 return false; 1085 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME) 1086 return vn_valueize (lhs1) == vn_valueize (lhs2); 1087 return operand_equal_p (lhs1, lhs2, 0); 1088 1089 case GIMPLE_ASSIGN: 1090 lhs1 = gimple_get_lhs (s1); 1091 lhs2 = gimple_get_lhs (s2); 1092 return (TREE_CODE (lhs1) == SSA_NAME 1093 && TREE_CODE (lhs2) == SSA_NAME 1094 && vn_valueize (lhs1) == vn_valueize (lhs2)); 1095 1096 case GIMPLE_COND: 1097 t1 = gimple_cond_lhs (s1); 1098 t2 = gimple_cond_lhs (s2); 1099 if (!operand_equal_p (t1, t2, 0) 1100 && !gvn_uses_equal (t1, t2)) 1101 return false; 1102 1103 t1 = gimple_cond_rhs (s1); 1104 t2 = gimple_cond_rhs (s2); 1105 if (!operand_equal_p (t1, t2, 0) 1106 && !gvn_uses_equal (t1, t2)) 1107 return false; 1108 1109 code1 = gimple_expr_code (s1); 1110 code2 = gimple_expr_code (s2); 1111 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index) 1112 != bitmap_bit_p (same_succ->inverse, bb2->index)); 1113 if (inv_cond) 1114 { 1115 bool honor_nans 1116 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1)))); 1117 code2 = invert_tree_comparison (code2, honor_nans); 1118 } 1119 return code1 == code2; 1120 1121 default: 1122 return false; 1123 } 1124 } 1125 1126 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE. 1127 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the 1128 processed statements. */ 1129 1130 static void 1131 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse, 1132 bool *vuse_escaped) 1133 { 1134 gimple stmt; 1135 tree lvuse; 1136 1137 while (true) 1138 { 1139 if (gsi_end_p (*gsi)) 1140 return; 1141 stmt = gsi_stmt (*gsi); 1142 1143 lvuse = gimple_vuse (stmt); 1144 if (lvuse != NULL_TREE) 1145 { 1146 *vuse = lvuse; 1147 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF)) 1148 *vuse_escaped = true; 1149 } 1150 1151 if (!(is_gimple_assign (stmt) && local_def (gimple_get_lhs (stmt)) 1152 && !gimple_has_side_effects (stmt))) 1153 return; 1154 gsi_prev_nondebug (gsi); 1155 } 1156 } 1157 1158 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so, 1159 clusters them. */ 1160 1161 static void 1162 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2) 1163 { 1164 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1); 1165 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2); 1166 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE; 1167 bool vuse_escaped = false; 1168 1169 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1170 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1171 1172 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2)) 1173 { 1174 if (!gimple_equal_p (same_succ, gsi_stmt (gsi1), gsi_stmt (gsi2))) 1175 return; 1176 1177 gsi_prev_nondebug (&gsi1); 1178 gsi_prev_nondebug (&gsi2); 1179 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); 1180 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); 1181 } 1182 1183 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2))) 1184 return; 1185 1186 /* If the incoming vuses are not the same, and the vuse escaped into an 1187 SSA_OP_DEF, then merging the 2 blocks will change the value of the def, 1188 which potentially means the semantics of one of the blocks will be changed. 1189 TODO: make this check more precise. */ 1190 if (vuse_escaped && vuse1 != vuse2) 1191 return; 1192 1193 if (dump_file) 1194 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n", 1195 bb1->index, bb2->index); 1196 1197 set_cluster (bb1, bb2); 1198 } 1199 1200 /* Returns whether for all phis in DEST the phi alternatives for E1 and 1201 E2 are equal. */ 1202 1203 static bool 1204 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2) 1205 { 1206 int n1 = e1->dest_idx, n2 = e2->dest_idx; 1207 gimple_stmt_iterator gsi; 1208 1209 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi)) 1210 { 1211 gimple phi = gsi_stmt (gsi); 1212 tree lhs = gimple_phi_result (phi); 1213 tree val1 = gimple_phi_arg_def (phi, n1); 1214 tree val2 = gimple_phi_arg_def (phi, n2); 1215 1216 if (!is_gimple_reg (lhs)) 1217 continue; 1218 1219 if (operand_equal_for_phi_arg_p (val1, val2)) 1220 continue; 1221 if (gvn_uses_equal (val1, val2)) 1222 continue; 1223 1224 return false; 1225 } 1226 1227 return true; 1228 } 1229 1230 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the 1231 phi alternatives for BB1 and BB2 are equal. */ 1232 1233 static bool 1234 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2) 1235 { 1236 unsigned int s; 1237 bitmap_iterator bs; 1238 edge e1, e2; 1239 basic_block succ; 1240 1241 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs) 1242 { 1243 succ = BASIC_BLOCK (s); 1244 e1 = find_edge (bb1, succ); 1245 e2 = find_edge (bb2, succ); 1246 if (e1->flags & EDGE_COMPLEX 1247 || e2->flags & EDGE_COMPLEX) 1248 return false; 1249 1250 /* For all phis in bb, the phi alternatives for e1 and e2 need to have 1251 the same value. */ 1252 if (!same_phi_alternatives_1 (succ, e1, e2)) 1253 return false; 1254 } 1255 1256 return true; 1257 } 1258 1259 /* Return true if BB has non-vop phis. */ 1260 1261 static bool 1262 bb_has_non_vop_phi (basic_block bb) 1263 { 1264 gimple_seq phis = phi_nodes (bb); 1265 gimple phi; 1266 1267 if (phis == NULL) 1268 return false; 1269 1270 if (!gimple_seq_singleton_p (phis)) 1271 return true; 1272 1273 phi = gimple_seq_first_stmt (phis); 1274 return is_gimple_reg (gimple_phi_result (phi)); 1275 } 1276 1277 /* Returns true if redirecting the incoming edges of FROM to TO maintains the 1278 invariant that uses in FROM are dominates by their defs. */ 1279 1280 static bool 1281 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to) 1282 { 1283 basic_block cd, dep_bb = BB_DEP_BB (to); 1284 edge_iterator ei; 1285 edge e; 1286 bitmap from_preds = BITMAP_ALLOC (NULL); 1287 1288 if (dep_bb == NULL) 1289 return true; 1290 1291 FOR_EACH_EDGE (e, ei, from->preds) 1292 bitmap_set_bit (from_preds, e->src->index); 1293 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds); 1294 BITMAP_FREE (from_preds); 1295 1296 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd); 1297 } 1298 1299 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its 1300 replacement bb) and vice versa maintains the invariant that uses in the 1301 replacement are dominates by their defs. */ 1302 1303 static bool 1304 deps_ok_for_redirect (basic_block bb1, basic_block bb2) 1305 { 1306 if (BB_CLUSTER (bb1) != NULL) 1307 bb1 = BB_CLUSTER (bb1)->rep_bb; 1308 1309 if (BB_CLUSTER (bb2) != NULL) 1310 bb2 = BB_CLUSTER (bb2)->rep_bb; 1311 1312 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2) 1313 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1)); 1314 } 1315 1316 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */ 1317 1318 static void 1319 find_clusters_1 (same_succ same_succ) 1320 { 1321 basic_block bb1, bb2; 1322 unsigned int i, j; 1323 bitmap_iterator bi, bj; 1324 int nr_comparisons; 1325 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS); 1326 1327 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi) 1328 { 1329 bb1 = BASIC_BLOCK (i); 1330 1331 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding 1332 phi-nodes in bb1 and bb2, with the same alternatives for the same 1333 preds. */ 1334 if (bb_has_non_vop_phi (bb1)) 1335 continue; 1336 1337 nr_comparisons = 0; 1338 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj) 1339 { 1340 bb2 = BASIC_BLOCK (j); 1341 1342 if (bb_has_non_vop_phi (bb2)) 1343 continue; 1344 1345 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2)) 1346 continue; 1347 1348 /* Limit quadratic behaviour. */ 1349 nr_comparisons++; 1350 if (nr_comparisons > max_comparisons) 1351 break; 1352 1353 /* This is a conservative dependency check. We could test more 1354 precise for allowed replacement direction. */ 1355 if (!deps_ok_for_redirect (bb1, bb2)) 1356 continue; 1357 1358 if (!(same_phi_alternatives (same_succ, bb1, bb2))) 1359 continue; 1360 1361 find_duplicate (same_succ, bb1, bb2); 1362 } 1363 } 1364 } 1365 1366 /* Find clusters of bbs which can be merged. */ 1367 1368 static void 1369 find_clusters (void) 1370 { 1371 same_succ same; 1372 1373 while (!VEC_empty (same_succ, worklist)) 1374 { 1375 same = VEC_pop (same_succ, worklist); 1376 same->in_worklist = false; 1377 if (dump_file && (dump_flags & TDF_DETAILS)) 1378 { 1379 fprintf (dump_file, "processing worklist entry\n"); 1380 same_succ_print (dump_file, same); 1381 } 1382 find_clusters_1 (same); 1383 } 1384 } 1385 1386 /* Returns the vop phi of BB, if any. */ 1387 1388 static gimple 1389 vop_phi (basic_block bb) 1390 { 1391 gimple stmt; 1392 gimple_stmt_iterator gsi; 1393 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1394 { 1395 stmt = gsi_stmt (gsi); 1396 if (is_gimple_reg (gimple_phi_result (stmt))) 1397 continue; 1398 return stmt; 1399 } 1400 return NULL; 1401 } 1402 1403 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */ 1404 1405 static void 1406 replace_block_by (basic_block bb1, basic_block bb2) 1407 { 1408 edge pred_edge; 1409 unsigned int i; 1410 gimple bb2_phi; 1411 1412 bb2_phi = vop_phi (bb2); 1413 1414 /* Mark the basic block as deleted. */ 1415 mark_basic_block_deleted (bb1); 1416 1417 /* Redirect the incoming edges of bb1 to bb2. */ 1418 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i) 1419 { 1420 pred_edge = EDGE_PRED (bb1, i - 1); 1421 pred_edge = redirect_edge_and_branch (pred_edge, bb2); 1422 gcc_assert (pred_edge != NULL); 1423 1424 if (bb2_phi == NULL) 1425 continue; 1426 1427 /* The phi might have run out of capacity when the redirect added an 1428 argument, which means it could have been replaced. Refresh it. */ 1429 bb2_phi = vop_phi (bb2); 1430 1431 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)), 1432 pred_edge, UNKNOWN_LOCATION); 1433 } 1434 1435 bb2->frequency += bb1->frequency; 1436 if (bb2->frequency > BB_FREQ_MAX) 1437 bb2->frequency = BB_FREQ_MAX; 1438 bb1->frequency = 0; 1439 1440 /* Do updates that use bb1, before deleting bb1. */ 1441 release_last_vdef (bb1); 1442 same_succ_flush_bb (bb1); 1443 1444 delete_basic_block (bb1); 1445 } 1446 1447 /* Bbs for which update_debug_stmt need to be called. */ 1448 1449 static bitmap update_bbs; 1450 1451 /* For each cluster in all_clusters, merge all cluster->bbs. Returns 1452 number of bbs removed. */ 1453 1454 static int 1455 apply_clusters (void) 1456 { 1457 basic_block bb1, bb2; 1458 bb_cluster c; 1459 unsigned int i, j; 1460 bitmap_iterator bj; 1461 int nr_bbs_removed = 0; 1462 1463 for (i = 0; i < VEC_length (bb_cluster, all_clusters); ++i) 1464 { 1465 c = VEC_index (bb_cluster, all_clusters, i); 1466 if (c == NULL) 1467 continue; 1468 1469 bb2 = c->rep_bb; 1470 bitmap_set_bit (update_bbs, bb2->index); 1471 1472 bitmap_clear_bit (c->bbs, bb2->index); 1473 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj) 1474 { 1475 bb1 = BASIC_BLOCK (j); 1476 bitmap_clear_bit (update_bbs, bb1->index); 1477 1478 replace_block_by (bb1, bb2); 1479 nr_bbs_removed++; 1480 } 1481 } 1482 1483 return nr_bbs_removed; 1484 } 1485 1486 /* Resets debug statement STMT if it has uses that are not dominated by their 1487 defs. */ 1488 1489 static void 1490 update_debug_stmt (gimple stmt) 1491 { 1492 use_operand_p use_p; 1493 ssa_op_iter oi; 1494 basic_block bbdef, bbuse; 1495 gimple def_stmt; 1496 tree name; 1497 1498 if (!gimple_debug_bind_p (stmt)) 1499 return; 1500 1501 bbuse = gimple_bb (stmt); 1502 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE) 1503 { 1504 name = USE_FROM_PTR (use_p); 1505 gcc_assert (TREE_CODE (name) == SSA_NAME); 1506 1507 def_stmt = SSA_NAME_DEF_STMT (name); 1508 gcc_assert (def_stmt != NULL); 1509 1510 bbdef = gimple_bb (def_stmt); 1511 if (bbdef == NULL || bbuse == bbdef 1512 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)) 1513 continue; 1514 1515 gimple_debug_bind_reset_value (stmt); 1516 update_stmt (stmt); 1517 } 1518 } 1519 1520 /* Resets all debug statements that have uses that are not 1521 dominated by their defs. */ 1522 1523 static void 1524 update_debug_stmts (void) 1525 { 1526 basic_block bb; 1527 bitmap_iterator bi; 1528 unsigned int i; 1529 1530 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi) 1531 { 1532 gimple stmt; 1533 gimple_stmt_iterator gsi; 1534 1535 bb = BASIC_BLOCK (i); 1536 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1537 { 1538 stmt = gsi_stmt (gsi); 1539 if (!is_gimple_debug (stmt)) 1540 continue; 1541 update_debug_stmt (stmt); 1542 } 1543 } 1544 } 1545 1546 /* Runs tail merge optimization. */ 1547 1548 unsigned int 1549 tail_merge_optimize (unsigned int todo) 1550 { 1551 int nr_bbs_removed_total = 0; 1552 int nr_bbs_removed; 1553 bool loop_entered = false; 1554 int iteration_nr = 0; 1555 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS); 1556 1557 if (!flag_tree_tail_merge || max_iterations == 0) 1558 return 0; 1559 1560 timevar_push (TV_TREE_TAIL_MERGE); 1561 1562 calculate_dominance_info (CDI_DOMINATORS); 1563 init_worklist (); 1564 1565 while (!VEC_empty (same_succ, worklist)) 1566 { 1567 if (!loop_entered) 1568 { 1569 loop_entered = true; 1570 alloc_cluster_vectors (); 1571 update_bbs = BITMAP_ALLOC (NULL); 1572 } 1573 else 1574 reset_cluster_vectors (); 1575 1576 iteration_nr++; 1577 if (dump_file && (dump_flags & TDF_DETAILS)) 1578 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr); 1579 1580 find_clusters (); 1581 gcc_assert (VEC_empty (same_succ, worklist)); 1582 if (VEC_empty (bb_cluster, all_clusters)) 1583 break; 1584 1585 nr_bbs_removed = apply_clusters (); 1586 nr_bbs_removed_total += nr_bbs_removed; 1587 if (nr_bbs_removed == 0) 1588 break; 1589 1590 free_dominance_info (CDI_DOMINATORS); 1591 1592 if (iteration_nr == max_iterations) 1593 break; 1594 1595 calculate_dominance_info (CDI_DOMINATORS); 1596 update_worklist (); 1597 } 1598 1599 if (dump_file && (dump_flags & TDF_DETAILS)) 1600 fprintf (dump_file, "htab collision / search: %f\n", 1601 htab_collisions (same_succ_htab)); 1602 1603 if (nr_bbs_removed_total > 0) 1604 { 1605 if (MAY_HAVE_DEBUG_STMTS) 1606 { 1607 calculate_dominance_info (CDI_DOMINATORS); 1608 update_debug_stmts (); 1609 } 1610 1611 if (dump_file && (dump_flags & TDF_DETAILS)) 1612 { 1613 fprintf (dump_file, "Before TODOs.\n"); 1614 dump_function_to_file (current_function_decl, dump_file, dump_flags); 1615 } 1616 1617 todo |= (TODO_verify_ssa | TODO_verify_stmts | TODO_verify_flow 1618 | TODO_dump_func); 1619 mark_sym_for_renaming (gimple_vop (cfun)); 1620 } 1621 1622 delete_worklist (); 1623 if (loop_entered) 1624 { 1625 delete_cluster_vectors (); 1626 BITMAP_FREE (update_bbs); 1627 } 1628 1629 timevar_pop (TV_TREE_TAIL_MERGE); 1630 1631 return todo; 1632 } 1633