1 /* Loop manipulation code for GNU compiler. 2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011 3 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "tm.h" 25 #include "rtl.h" 26 #include "hard-reg-set.h" 27 #include "obstack.h" 28 #include "basic-block.h" 29 #include "cfgloop.h" 30 #include "cfglayout.h" 31 #include "cfghooks.h" 32 #include "output.h" 33 #include "tree-flow.h" 34 35 static void copy_loops_to (struct loop **, int, 36 struct loop *); 37 static void loop_redirect_edge (edge, basic_block); 38 static void remove_bbs (basic_block *, int); 39 static bool rpe_enum_p (const_basic_block, const void *); 40 static int find_path (edge, basic_block **); 41 static void fix_loop_placements (struct loop *, bool *); 42 static bool fix_bb_placement (basic_block); 43 static void fix_bb_placements (basic_block, bool *); 44 static void unloop (struct loop *, bool *); 45 46 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y)) 47 48 /* Checks whether basic block BB is dominated by DATA. */ 49 static bool 50 rpe_enum_p (const_basic_block bb, const void *data) 51 { 52 return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data); 53 } 54 55 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */ 56 57 static void 58 remove_bbs (basic_block *bbs, int nbbs) 59 { 60 int i; 61 62 for (i = 0; i < nbbs; i++) 63 delete_basic_block (bbs[i]); 64 } 65 66 /* Find path -- i.e. the basic blocks dominated by edge E and put them 67 into array BBS, that will be allocated large enough to contain them. 68 E->dest must have exactly one predecessor for this to work (it is 69 easy to achieve and we do not put it here because we do not want to 70 alter anything by this function). The number of basic blocks in the 71 path is returned. */ 72 static int 73 find_path (edge e, basic_block **bbs) 74 { 75 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1); 76 77 /* Find bbs in the path. */ 78 *bbs = XCNEWVEC (basic_block, n_basic_blocks); 79 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs, 80 n_basic_blocks, e->dest); 81 } 82 83 /* Fix placement of basic block BB inside loop hierarchy -- 84 Let L be a loop to that BB belongs. Then every successor of BB must either 85 1) belong to some superloop of loop L, or 86 2) be a header of loop K such that K->outer is superloop of L 87 Returns true if we had to move BB into other loop to enforce this condition, 88 false if the placement of BB was already correct (provided that placements 89 of its successors are correct). */ 90 static bool 91 fix_bb_placement (basic_block bb) 92 { 93 edge e; 94 edge_iterator ei; 95 struct loop *loop = current_loops->tree_root, *act; 96 97 FOR_EACH_EDGE (e, ei, bb->succs) 98 { 99 if (e->dest == EXIT_BLOCK_PTR) 100 continue; 101 102 act = e->dest->loop_father; 103 if (act->header == e->dest) 104 act = loop_outer (act); 105 106 if (flow_loop_nested_p (loop, act)) 107 loop = act; 108 } 109 110 if (loop == bb->loop_father) 111 return false; 112 113 remove_bb_from_loops (bb); 114 add_bb_to_loop (bb, loop); 115 116 return true; 117 } 118 119 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop 120 of LOOP to that leads at least one exit edge of LOOP, and set it 121 as the immediate superloop of LOOP. Return true if the immediate superloop 122 of LOOP changed. */ 123 124 static bool 125 fix_loop_placement (struct loop *loop) 126 { 127 unsigned i; 128 edge e; 129 VEC (edge, heap) *exits = get_loop_exit_edges (loop); 130 struct loop *father = current_loops->tree_root, *act; 131 bool ret = false; 132 133 FOR_EACH_VEC_ELT (edge, exits, i, e) 134 { 135 act = find_common_loop (loop, e->dest->loop_father); 136 if (flow_loop_nested_p (father, act)) 137 father = act; 138 } 139 140 if (father != loop_outer (loop)) 141 { 142 for (act = loop_outer (loop); act != father; act = loop_outer (act)) 143 act->num_nodes -= loop->num_nodes; 144 flow_loop_tree_node_remove (loop); 145 flow_loop_tree_node_add (father, loop); 146 147 /* The exit edges of LOOP no longer exits its original immediate 148 superloops; remove them from the appropriate exit lists. */ 149 FOR_EACH_VEC_ELT (edge, exits, i, e) 150 rescan_loop_exit (e, false, false); 151 152 ret = true; 153 } 154 155 VEC_free (edge, heap, exits); 156 return ret; 157 } 158 159 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e. 160 enforce condition condition stated in description of fix_bb_placement. We 161 start from basic block FROM that had some of its successors removed, so that 162 his placement no longer has to be correct, and iteratively fix placement of 163 its predecessors that may change if placement of FROM changed. Also fix 164 placement of subloops of FROM->loop_father, that might also be altered due 165 to this change; the condition for them is similar, except that instead of 166 successors we consider edges coming out of the loops. 167 168 If the changes may invalidate the information about irreducible regions, 169 IRRED_INVALIDATED is set to true. */ 170 171 static void 172 fix_bb_placements (basic_block from, 173 bool *irred_invalidated) 174 { 175 sbitmap in_queue; 176 basic_block *queue, *qtop, *qbeg, *qend; 177 struct loop *base_loop, *target_loop; 178 edge e; 179 180 /* We pass through blocks back-reachable from FROM, testing whether some 181 of their successors moved to outer loop. It may be necessary to 182 iterate several times, but it is finite, as we stop unless we move 183 the basic block up the loop structure. The whole story is a bit 184 more complicated due to presence of subloops, those are moved using 185 fix_loop_placement. */ 186 187 base_loop = from->loop_father; 188 /* If we are already in the outermost loop, the basic blocks cannot be moved 189 outside of it. If FROM is the header of the base loop, it cannot be moved 190 outside of it, either. In both cases, we can end now. */ 191 if (base_loop == current_loops->tree_root 192 || from == base_loop->header) 193 return; 194 195 in_queue = sbitmap_alloc (last_basic_block); 196 sbitmap_zero (in_queue); 197 SET_BIT (in_queue, from->index); 198 /* Prevent us from going out of the base_loop. */ 199 SET_BIT (in_queue, base_loop->header->index); 200 201 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1); 202 qtop = queue + base_loop->num_nodes + 1; 203 qbeg = queue; 204 qend = queue + 1; 205 *qbeg = from; 206 207 while (qbeg != qend) 208 { 209 edge_iterator ei; 210 from = *qbeg; 211 qbeg++; 212 if (qbeg == qtop) 213 qbeg = queue; 214 RESET_BIT (in_queue, from->index); 215 216 if (from->loop_father->header == from) 217 { 218 /* Subloop header, maybe move the loop upward. */ 219 if (!fix_loop_placement (from->loop_father)) 220 continue; 221 target_loop = loop_outer (from->loop_father); 222 } 223 else 224 { 225 /* Ordinary basic block. */ 226 if (!fix_bb_placement (from)) 227 continue; 228 target_loop = from->loop_father; 229 } 230 231 FOR_EACH_EDGE (e, ei, from->succs) 232 { 233 if (e->flags & EDGE_IRREDUCIBLE_LOOP) 234 *irred_invalidated = true; 235 } 236 237 /* Something has changed, insert predecessors into queue. */ 238 FOR_EACH_EDGE (e, ei, from->preds) 239 { 240 basic_block pred = e->src; 241 struct loop *nca; 242 243 if (e->flags & EDGE_IRREDUCIBLE_LOOP) 244 *irred_invalidated = true; 245 246 if (TEST_BIT (in_queue, pred->index)) 247 continue; 248 249 /* If it is subloop, then it either was not moved, or 250 the path up the loop tree from base_loop do not contain 251 it. */ 252 nca = find_common_loop (pred->loop_father, base_loop); 253 if (pred->loop_father != base_loop 254 && (nca == base_loop 255 || nca != pred->loop_father)) 256 pred = pred->loop_father->header; 257 else if (!flow_loop_nested_p (target_loop, pred->loop_father)) 258 { 259 /* If PRED is already higher in the loop hierarchy than the 260 TARGET_LOOP to that we moved FROM, the change of the position 261 of FROM does not affect the position of PRED, so there is no 262 point in processing it. */ 263 continue; 264 } 265 266 if (TEST_BIT (in_queue, pred->index)) 267 continue; 268 269 /* Schedule the basic block. */ 270 *qend = pred; 271 qend++; 272 if (qend == qtop) 273 qend = queue; 274 SET_BIT (in_queue, pred->index); 275 } 276 } 277 free (in_queue); 278 free (queue); 279 } 280 281 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E 282 and update loop structures and dominators. Return true if we were able 283 to remove the path, false otherwise (and nothing is affected then). */ 284 bool 285 remove_path (edge e) 286 { 287 edge ae; 288 basic_block *rem_bbs, *bord_bbs, from, bb; 289 VEC (basic_block, heap) *dom_bbs; 290 int i, nrem, n_bord_bbs; 291 sbitmap seen; 292 bool irred_invalidated = false; 293 edge_iterator ei; 294 struct loop *l, *f; 295 296 if (!can_remove_branch_p (e)) 297 return false; 298 299 /* Keep track of whether we need to update information about irreducible 300 regions. This is the case if the removed area is a part of the 301 irreducible region, or if the set of basic blocks that belong to a loop 302 that is inside an irreducible region is changed, or if such a loop is 303 removed. */ 304 if (e->flags & EDGE_IRREDUCIBLE_LOOP) 305 irred_invalidated = true; 306 307 /* We need to check whether basic blocks are dominated by the edge 308 e, but we only have basic block dominators. This is easy to 309 fix -- when e->dest has exactly one predecessor, this corresponds 310 to blocks dominated by e->dest, if not, split the edge. */ 311 if (!single_pred_p (e->dest)) 312 e = single_pred_edge (split_edge (e)); 313 314 /* It may happen that by removing path we remove one or more loops 315 we belong to. In this case first unloop the loops, then proceed 316 normally. We may assume that e->dest is not a header of any loop, 317 as it now has exactly one predecessor. */ 318 for (l = e->src->loop_father; loop_outer (l); l = f) 319 { 320 f = loop_outer (l); 321 if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest)) 322 unloop (l, &irred_invalidated); 323 } 324 325 /* Identify the path. */ 326 nrem = find_path (e, &rem_bbs); 327 328 n_bord_bbs = 0; 329 bord_bbs = XCNEWVEC (basic_block, n_basic_blocks); 330 seen = sbitmap_alloc (last_basic_block); 331 sbitmap_zero (seen); 332 333 /* Find "border" hexes -- i.e. those with predecessor in removed path. */ 334 for (i = 0; i < nrem; i++) 335 SET_BIT (seen, rem_bbs[i]->index); 336 if (!irred_invalidated) 337 FOR_EACH_EDGE (ae, ei, e->src->succs) 338 if (ae != e && ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index) 339 && ae->flags & EDGE_IRREDUCIBLE_LOOP) 340 irred_invalidated = true; 341 for (i = 0; i < nrem; i++) 342 { 343 bb = rem_bbs[i]; 344 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs) 345 if (ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index)) 346 { 347 SET_BIT (seen, ae->dest->index); 348 bord_bbs[n_bord_bbs++] = ae->dest; 349 350 if (ae->flags & EDGE_IRREDUCIBLE_LOOP) 351 irred_invalidated = true; 352 } 353 } 354 355 /* Remove the path. */ 356 from = e->src; 357 remove_branch (e); 358 dom_bbs = NULL; 359 360 /* Cancel loops contained in the path. */ 361 for (i = 0; i < nrem; i++) 362 if (rem_bbs[i]->loop_father->header == rem_bbs[i]) 363 cancel_loop_tree (rem_bbs[i]->loop_father); 364 365 remove_bbs (rem_bbs, nrem); 366 free (rem_bbs); 367 368 /* Find blocks whose dominators may be affected. */ 369 sbitmap_zero (seen); 370 for (i = 0; i < n_bord_bbs; i++) 371 { 372 basic_block ldom; 373 374 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]); 375 if (TEST_BIT (seen, bb->index)) 376 continue; 377 SET_BIT (seen, bb->index); 378 379 for (ldom = first_dom_son (CDI_DOMINATORS, bb); 380 ldom; 381 ldom = next_dom_son (CDI_DOMINATORS, ldom)) 382 if (!dominated_by_p (CDI_DOMINATORS, from, ldom)) 383 VEC_safe_push (basic_block, heap, dom_bbs, ldom); 384 } 385 386 free (seen); 387 388 /* Recount dominators. */ 389 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true); 390 VEC_free (basic_block, heap, dom_bbs); 391 free (bord_bbs); 392 393 /* Fix placements of basic blocks inside loops and the placement of 394 loops in the loop tree. */ 395 fix_bb_placements (from, &irred_invalidated); 396 fix_loop_placements (from->loop_father, &irred_invalidated); 397 398 if (irred_invalidated 399 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) 400 mark_irreducible_loops (); 401 402 return true; 403 } 404 405 /* Creates place for a new LOOP in loops structure. */ 406 407 static void 408 place_new_loop (struct loop *loop) 409 { 410 loop->num = number_of_loops (); 411 VEC_safe_push (loop_p, gc, current_loops->larray, loop); 412 } 413 414 /* Given LOOP structure with filled header and latch, find the body of the 415 corresponding loop and add it to loops tree. Insert the LOOP as a son of 416 outer. */ 417 418 void 419 add_loop (struct loop *loop, struct loop *outer) 420 { 421 basic_block *bbs; 422 int i, n; 423 struct loop *subloop; 424 edge e; 425 edge_iterator ei; 426 427 /* Add it to loop structure. */ 428 place_new_loop (loop); 429 flow_loop_tree_node_add (outer, loop); 430 431 /* Find its nodes. */ 432 bbs = XNEWVEC (basic_block, n_basic_blocks); 433 n = get_loop_body_with_size (loop, bbs, n_basic_blocks); 434 435 for (i = 0; i < n; i++) 436 { 437 if (bbs[i]->loop_father == outer) 438 { 439 remove_bb_from_loops (bbs[i]); 440 add_bb_to_loop (bbs[i], loop); 441 continue; 442 } 443 444 loop->num_nodes++; 445 446 /* If we find a direct subloop of OUTER, move it to LOOP. */ 447 subloop = bbs[i]->loop_father; 448 if (loop_outer (subloop) == outer 449 && subloop->header == bbs[i]) 450 { 451 flow_loop_tree_node_remove (subloop); 452 flow_loop_tree_node_add (loop, subloop); 453 } 454 } 455 456 /* Update the information about loop exit edges. */ 457 for (i = 0; i < n; i++) 458 { 459 FOR_EACH_EDGE (e, ei, bbs[i]->succs) 460 { 461 rescan_loop_exit (e, false, false); 462 } 463 } 464 465 free (bbs); 466 } 467 468 /* Multiply all frequencies in LOOP by NUM/DEN. */ 469 void 470 scale_loop_frequencies (struct loop *loop, int num, int den) 471 { 472 basic_block *bbs; 473 474 bbs = get_loop_body (loop); 475 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den); 476 free (bbs); 477 } 478 479 /* Recompute dominance information for basic blocks outside LOOP. */ 480 481 static void 482 update_dominators_in_loop (struct loop *loop) 483 { 484 VEC (basic_block, heap) *dom_bbs = NULL; 485 sbitmap seen; 486 basic_block *body; 487 unsigned i; 488 489 seen = sbitmap_alloc (last_basic_block); 490 sbitmap_zero (seen); 491 body = get_loop_body (loop); 492 493 for (i = 0; i < loop->num_nodes; i++) 494 SET_BIT (seen, body[i]->index); 495 496 for (i = 0; i < loop->num_nodes; i++) 497 { 498 basic_block ldom; 499 500 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]); 501 ldom; 502 ldom = next_dom_son (CDI_DOMINATORS, ldom)) 503 if (!TEST_BIT (seen, ldom->index)) 504 { 505 SET_BIT (seen, ldom->index); 506 VEC_safe_push (basic_block, heap, dom_bbs, ldom); 507 } 508 } 509 510 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); 511 free (body); 512 free (seen); 513 VEC_free (basic_block, heap, dom_bbs); 514 } 515 516 /* Creates an if region as shown above. CONDITION is used to create 517 the test for the if. 518 519 | 520 | ------------- ------------- 521 | | pred_bb | | pred_bb | 522 | ------------- ------------- 523 | | | 524 | | | ENTRY_EDGE 525 | | ENTRY_EDGE V 526 | | ====> ------------- 527 | | | cond_bb | 528 | | | CONDITION | 529 | | ------------- 530 | V / \ 531 | ------------- e_false / \ e_true 532 | | succ_bb | V V 533 | ------------- ----------- ----------- 534 | | false_bb | | true_bb | 535 | ----------- ----------- 536 | \ / 537 | \ / 538 | V V 539 | ------------- 540 | | join_bb | 541 | ------------- 542 | | exit_edge (result) 543 | V 544 | ----------- 545 | | succ_bb | 546 | ----------- 547 | 548 */ 549 550 edge 551 create_empty_if_region_on_edge (edge entry_edge, tree condition) 552 { 553 554 basic_block cond_bb, true_bb, false_bb, join_bb; 555 edge e_true, e_false, exit_edge; 556 gimple cond_stmt; 557 tree simple_cond; 558 gimple_stmt_iterator gsi; 559 560 cond_bb = split_edge (entry_edge); 561 562 /* Insert condition in cond_bb. */ 563 gsi = gsi_last_bb (cond_bb); 564 simple_cond = 565 force_gimple_operand_gsi (&gsi, condition, true, NULL, 566 false, GSI_NEW_STMT); 567 cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE); 568 gsi = gsi_last_bb (cond_bb); 569 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); 570 571 join_bb = split_edge (single_succ_edge (cond_bb)); 572 573 e_true = single_succ_edge (cond_bb); 574 true_bb = split_edge (e_true); 575 576 e_false = make_edge (cond_bb, join_bb, 0); 577 false_bb = split_edge (e_false); 578 579 e_true->flags &= ~EDGE_FALLTHRU; 580 e_true->flags |= EDGE_TRUE_VALUE; 581 e_false->flags &= ~EDGE_FALLTHRU; 582 e_false->flags |= EDGE_FALSE_VALUE; 583 584 set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src); 585 set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb); 586 set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb); 587 set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb); 588 589 exit_edge = single_succ_edge (join_bb); 590 591 if (single_pred_p (exit_edge->dest)) 592 set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb); 593 594 return exit_edge; 595 } 596 597 /* create_empty_loop_on_edge 598 | 599 | - pred_bb - ------ pred_bb ------ 600 | | | | iv0 = initial_value | 601 | -----|----- ---------|----------- 602 | | ______ | entry_edge 603 | | entry_edge / | | 604 | | ====> | -V---V- loop_header ------------- 605 | V | | iv_before = phi (iv0, iv_after) | 606 | - succ_bb - | ---|----------------------------- 607 | | | | | 608 | ----------- | ---V--- loop_body --------------- 609 | | | iv_after = iv_before + stride | 610 | | | if (iv_before < upper_bound) | 611 | | ---|--------------\-------------- 612 | | | \ exit_e 613 | | V \ 614 | | - loop_latch - V- succ_bb - 615 | | | | | | 616 | | /------------- ----------- 617 | \ ___ / 618 619 Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME 620 that is used before the increment of IV. IV_BEFORE should be used for 621 adding code to the body that uses the IV. OUTER is the outer loop in 622 which the new loop should be inserted. 623 624 Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and 625 inserted on the loop entry edge. This implies that this function 626 should be used only when the UPPER_BOUND expression is a loop 627 invariant. */ 628 629 struct loop * 630 create_empty_loop_on_edge (edge entry_edge, 631 tree initial_value, 632 tree stride, tree upper_bound, 633 tree iv, 634 tree *iv_before, 635 tree *iv_after, 636 struct loop *outer) 637 { 638 basic_block loop_header, loop_latch, succ_bb, pred_bb; 639 struct loop *loop; 640 gimple_stmt_iterator gsi; 641 gimple_seq stmts; 642 gimple cond_expr; 643 tree exit_test; 644 edge exit_e; 645 int prob; 646 647 gcc_assert (entry_edge && initial_value && stride && upper_bound && iv); 648 649 /* Create header, latch and wire up the loop. */ 650 pred_bb = entry_edge->src; 651 loop_header = split_edge (entry_edge); 652 loop_latch = split_edge (single_succ_edge (loop_header)); 653 succ_bb = single_succ (loop_latch); 654 make_edge (loop_header, succ_bb, 0); 655 redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header); 656 657 /* Set immediate dominator information. */ 658 set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb); 659 set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header); 660 set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header); 661 662 /* Initialize a loop structure and put it in a loop hierarchy. */ 663 loop = alloc_loop (); 664 loop->header = loop_header; 665 loop->latch = loop_latch; 666 add_loop (loop, outer); 667 668 /* TODO: Fix frequencies and counts. */ 669 prob = REG_BR_PROB_BASE / 2; 670 671 scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE); 672 673 /* Update dominators. */ 674 update_dominators_in_loop (loop); 675 676 /* Modify edge flags. */ 677 exit_e = single_exit (loop); 678 exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE; 679 single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE; 680 681 /* Construct IV code in loop. */ 682 initial_value = force_gimple_operand (initial_value, &stmts, true, iv); 683 if (stmts) 684 { 685 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts); 686 gsi_commit_edge_inserts (); 687 } 688 689 upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL); 690 if (stmts) 691 { 692 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts); 693 gsi_commit_edge_inserts (); 694 } 695 696 gsi = gsi_last_bb (loop_header); 697 create_iv (initial_value, stride, iv, loop, &gsi, false, 698 iv_before, iv_after); 699 700 /* Insert loop exit condition. */ 701 cond_expr = gimple_build_cond 702 (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE); 703 704 exit_test = gimple_cond_lhs (cond_expr); 705 exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL, 706 false, GSI_NEW_STMT); 707 gimple_cond_set_lhs (cond_expr, exit_test); 708 gsi = gsi_last_bb (exit_e->src); 709 gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT); 710 711 split_block_after_labels (loop_header); 712 713 return loop; 714 } 715 716 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting 717 latch to header and update loop tree and dominators 718 accordingly. Everything between them plus LATCH_EDGE destination must 719 be dominated by HEADER_EDGE destination, and back-reachable from 720 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB, 721 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and 722 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE. 723 Returns the newly created loop. Frequencies and counts in the new loop 724 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */ 725 726 struct loop * 727 loopify (edge latch_edge, edge header_edge, 728 basic_block switch_bb, edge true_edge, edge false_edge, 729 bool redirect_all_edges, unsigned true_scale, unsigned false_scale) 730 { 731 basic_block succ_bb = latch_edge->dest; 732 basic_block pred_bb = header_edge->src; 733 struct loop *loop = alloc_loop (); 734 struct loop *outer = loop_outer (succ_bb->loop_father); 735 int freq; 736 gcov_type cnt; 737 edge e; 738 edge_iterator ei; 739 740 loop->header = header_edge->dest; 741 loop->latch = latch_edge->src; 742 743 freq = EDGE_FREQUENCY (header_edge); 744 cnt = header_edge->count; 745 746 /* Redirect edges. */ 747 loop_redirect_edge (latch_edge, loop->header); 748 loop_redirect_edge (true_edge, succ_bb); 749 750 /* During loop versioning, one of the switch_bb edge is already properly 751 set. Do not redirect it again unless redirect_all_edges is true. */ 752 if (redirect_all_edges) 753 { 754 loop_redirect_edge (header_edge, switch_bb); 755 loop_redirect_edge (false_edge, loop->header); 756 757 /* Update dominators. */ 758 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb); 759 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb); 760 } 761 762 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb); 763 764 /* Compute new loop. */ 765 add_loop (loop, outer); 766 767 /* Add switch_bb to appropriate loop. */ 768 if (switch_bb->loop_father) 769 remove_bb_from_loops (switch_bb); 770 add_bb_to_loop (switch_bb, outer); 771 772 /* Fix frequencies. */ 773 if (redirect_all_edges) 774 { 775 switch_bb->frequency = freq; 776 switch_bb->count = cnt; 777 FOR_EACH_EDGE (e, ei, switch_bb->succs) 778 { 779 e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE; 780 } 781 } 782 scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE); 783 scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE); 784 update_dominators_in_loop (loop); 785 786 return loop; 787 } 788 789 /* Remove the latch edge of a LOOP and update loops to indicate that 790 the LOOP was removed. After this function, original loop latch will 791 have no successor, which caller is expected to fix somehow. 792 793 If this may cause the information about irreducible regions to become 794 invalid, IRRED_INVALIDATED is set to true. */ 795 796 static void 797 unloop (struct loop *loop, bool *irred_invalidated) 798 { 799 basic_block *body; 800 struct loop *ploop; 801 unsigned i, n; 802 basic_block latch = loop->latch; 803 bool dummy = false; 804 805 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP) 806 *irred_invalidated = true; 807 808 /* This is relatively straightforward. The dominators are unchanged, as 809 loop header dominates loop latch, so the only thing we have to care of 810 is the placement of loops and basic blocks inside the loop tree. We 811 move them all to the loop->outer, and then let fix_bb_placements do 812 its work. */ 813 814 body = get_loop_body (loop); 815 n = loop->num_nodes; 816 for (i = 0; i < n; i++) 817 if (body[i]->loop_father == loop) 818 { 819 remove_bb_from_loops (body[i]); 820 add_bb_to_loop (body[i], loop_outer (loop)); 821 } 822 free(body); 823 824 while (loop->inner) 825 { 826 ploop = loop->inner; 827 flow_loop_tree_node_remove (ploop); 828 flow_loop_tree_node_add (loop_outer (loop), ploop); 829 } 830 831 /* Remove the loop and free its data. */ 832 delete_loop (loop); 833 834 remove_edge (single_succ_edge (latch)); 835 836 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if 837 there is an irreducible region inside the cancelled loop, the flags will 838 be still correct. */ 839 fix_bb_placements (latch, &dummy); 840 } 841 842 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that 843 condition stated in description of fix_loop_placement holds for them. 844 It is used in case when we removed some edges coming out of LOOP, which 845 may cause the right placement of LOOP inside loop tree to change. 846 847 IRRED_INVALIDATED is set to true if a change in the loop structures might 848 invalidate the information about irreducible regions. */ 849 850 static void 851 fix_loop_placements (struct loop *loop, bool *irred_invalidated) 852 { 853 struct loop *outer; 854 855 while (loop_outer (loop)) 856 { 857 outer = loop_outer (loop); 858 if (!fix_loop_placement (loop)) 859 break; 860 861 /* Changing the placement of a loop in the loop tree may alter the 862 validity of condition 2) of the description of fix_bb_placement 863 for its preheader, because the successor is the header and belongs 864 to the loop. So call fix_bb_placements to fix up the placement 865 of the preheader and (possibly) of its predecessors. */ 866 fix_bb_placements (loop_preheader_edge (loop)->src, 867 irred_invalidated); 868 loop = outer; 869 } 870 } 871 872 /* Copies copy of LOOP as subloop of TARGET loop, placing newly 873 created loop into loops structure. */ 874 struct loop * 875 duplicate_loop (struct loop *loop, struct loop *target) 876 { 877 struct loop *cloop; 878 cloop = alloc_loop (); 879 place_new_loop (cloop); 880 881 /* Mark the new loop as copy of LOOP. */ 882 set_loop_copy (loop, cloop); 883 884 /* Add it to target. */ 885 flow_loop_tree_node_add (target, cloop); 886 887 return cloop; 888 } 889 890 /* Copies structure of subloops of LOOP into TARGET loop, placing 891 newly created loops into loop tree. */ 892 void 893 duplicate_subloops (struct loop *loop, struct loop *target) 894 { 895 struct loop *aloop, *cloop; 896 897 for (aloop = loop->inner; aloop; aloop = aloop->next) 898 { 899 cloop = duplicate_loop (aloop, target); 900 duplicate_subloops (aloop, cloop); 901 } 902 } 903 904 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS, 905 into TARGET loop, placing newly created loops into loop tree. */ 906 static void 907 copy_loops_to (struct loop **copied_loops, int n, struct loop *target) 908 { 909 struct loop *aloop; 910 int i; 911 912 for (i = 0; i < n; i++) 913 { 914 aloop = duplicate_loop (copied_loops[i], target); 915 duplicate_subloops (copied_loops[i], aloop); 916 } 917 } 918 919 /* Redirects edge E to basic block DEST. */ 920 static void 921 loop_redirect_edge (edge e, basic_block dest) 922 { 923 if (e->dest == dest) 924 return; 925 926 redirect_edge_and_branch_force (e, dest); 927 } 928 929 /* Check whether LOOP's body can be duplicated. */ 930 bool 931 can_duplicate_loop_p (const struct loop *loop) 932 { 933 int ret; 934 basic_block *bbs = get_loop_body (loop); 935 936 ret = can_copy_bbs_p (bbs, loop->num_nodes); 937 free (bbs); 938 939 return ret; 940 } 941 942 /* Sets probability and count of edge E to zero. The probability and count 943 is redistributed evenly to the remaining edges coming from E->src. */ 944 945 static void 946 set_zero_probability (edge e) 947 { 948 basic_block bb = e->src; 949 edge_iterator ei; 950 edge ae, last = NULL; 951 unsigned n = EDGE_COUNT (bb->succs); 952 gcov_type cnt = e->count, cnt1; 953 unsigned prob = e->probability, prob1; 954 955 gcc_assert (n > 1); 956 cnt1 = cnt / (n - 1); 957 prob1 = prob / (n - 1); 958 959 FOR_EACH_EDGE (ae, ei, bb->succs) 960 { 961 if (ae == e) 962 continue; 963 964 ae->probability += prob1; 965 ae->count += cnt1; 966 last = ae; 967 } 968 969 /* Move the rest to one of the edges. */ 970 last->probability += prob % (n - 1); 971 last->count += cnt % (n - 1); 972 973 e->probability = 0; 974 e->count = 0; 975 } 976 977 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating 978 loop structure and dominators. E's destination must be LOOP header for 979 this to work, i.e. it must be entry or latch edge of this loop; these are 980 unique, as the loops must have preheaders for this function to work 981 correctly (in case E is latch, the function unrolls the loop, if E is entry 982 edge, it peels the loop). Store edges created by copying ORIG edge from 983 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to 984 original LOOP body, the other copies are numbered in order given by control 985 flow through them) into TO_REMOVE array. Returns false if duplication is 986 impossible. */ 987 988 bool 989 duplicate_loop_to_header_edge (struct loop *loop, edge e, 990 unsigned int ndupl, sbitmap wont_exit, 991 edge orig, VEC (edge, heap) **to_remove, 992 int flags) 993 { 994 struct loop *target, *aloop; 995 struct loop **orig_loops; 996 unsigned n_orig_loops; 997 basic_block header = loop->header, latch = loop->latch; 998 basic_block *new_bbs, *bbs, *first_active; 999 basic_block new_bb, bb, first_active_latch = NULL; 1000 edge ae, latch_edge; 1001 edge spec_edges[2], new_spec_edges[2]; 1002 #define SE_LATCH 0 1003 #define SE_ORIG 1 1004 unsigned i, j, n; 1005 int is_latch = (latch == e->src); 1006 int scale_act = 0, *scale_step = NULL, scale_main = 0; 1007 int scale_after_exit = 0; 1008 int p, freq_in, freq_le, freq_out_orig; 1009 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main; 1010 int add_irreducible_flag; 1011 basic_block place_after; 1012 bitmap bbs_to_scale = NULL; 1013 bitmap_iterator bi; 1014 1015 gcc_assert (e->dest == loop->header); 1016 gcc_assert (ndupl > 0); 1017 1018 if (orig) 1019 { 1020 /* Orig must be edge out of the loop. */ 1021 gcc_assert (flow_bb_inside_loop_p (loop, orig->src)); 1022 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest)); 1023 } 1024 1025 n = loop->num_nodes; 1026 bbs = get_loop_body_in_dom_order (loop); 1027 gcc_assert (bbs[0] == loop->header); 1028 gcc_assert (bbs[n - 1] == loop->latch); 1029 1030 /* Check whether duplication is possible. */ 1031 if (!can_copy_bbs_p (bbs, loop->num_nodes)) 1032 { 1033 free (bbs); 1034 return false; 1035 } 1036 new_bbs = XNEWVEC (basic_block, loop->num_nodes); 1037 1038 /* In case we are doing loop peeling and the loop is in the middle of 1039 irreducible region, the peeled copies will be inside it too. */ 1040 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP; 1041 gcc_assert (!is_latch || !add_irreducible_flag); 1042 1043 /* Find edge from latch. */ 1044 latch_edge = loop_latch_edge (loop); 1045 1046 if (flags & DLTHE_FLAG_UPDATE_FREQ) 1047 { 1048 /* Calculate coefficients by that we have to scale frequencies 1049 of duplicated loop bodies. */ 1050 freq_in = header->frequency; 1051 freq_le = EDGE_FREQUENCY (latch_edge); 1052 if (freq_in == 0) 1053 freq_in = 1; 1054 if (freq_in < freq_le) 1055 freq_in = freq_le; 1056 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le; 1057 if (freq_out_orig > freq_in - freq_le) 1058 freq_out_orig = freq_in - freq_le; 1059 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in); 1060 prob_pass_wont_exit = 1061 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in); 1062 1063 if (orig 1064 && REG_BR_PROB_BASE - orig->probability != 0) 1065 { 1066 /* The blocks that are dominated by a removed exit edge ORIG have 1067 frequencies scaled by this. */ 1068 scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, 1069 REG_BR_PROB_BASE - orig->probability); 1070 bbs_to_scale = BITMAP_ALLOC (NULL); 1071 for (i = 0; i < n; i++) 1072 { 1073 if (bbs[i] != orig->src 1074 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src)) 1075 bitmap_set_bit (bbs_to_scale, i); 1076 } 1077 } 1078 1079 scale_step = XNEWVEC (int, ndupl); 1080 1081 for (i = 1; i <= ndupl; i++) 1082 scale_step[i - 1] = TEST_BIT (wont_exit, i) 1083 ? prob_pass_wont_exit 1084 : prob_pass_thru; 1085 1086 /* Complete peeling is special as the probability of exit in last 1087 copy becomes 1. */ 1088 if (flags & DLTHE_FLAG_COMPLETTE_PEEL) 1089 { 1090 int wanted_freq = EDGE_FREQUENCY (e); 1091 1092 if (wanted_freq > freq_in) 1093 wanted_freq = freq_in; 1094 1095 gcc_assert (!is_latch); 1096 /* First copy has frequency of incoming edge. Each subsequent 1097 frequency should be reduced by prob_pass_wont_exit. Caller 1098 should've managed the flags so all except for original loop 1099 has won't exist set. */ 1100 scale_act = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in); 1101 /* Now simulate the duplication adjustments and compute header 1102 frequency of the last copy. */ 1103 for (i = 0; i < ndupl; i++) 1104 wanted_freq = RDIV (wanted_freq * scale_step[i], REG_BR_PROB_BASE); 1105 scale_main = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in); 1106 } 1107 else if (is_latch) 1108 { 1109 prob_pass_main = TEST_BIT (wont_exit, 0) 1110 ? prob_pass_wont_exit 1111 : prob_pass_thru; 1112 p = prob_pass_main; 1113 scale_main = REG_BR_PROB_BASE; 1114 for (i = 0; i < ndupl; i++) 1115 { 1116 scale_main += p; 1117 p = RDIV (p * scale_step[i], REG_BR_PROB_BASE); 1118 } 1119 scale_main = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, scale_main); 1120 scale_act = RDIV (scale_main * prob_pass_main, REG_BR_PROB_BASE); 1121 } 1122 else 1123 { 1124 scale_main = REG_BR_PROB_BASE; 1125 for (i = 0; i < ndupl; i++) 1126 scale_main = RDIV (scale_main * scale_step[i], REG_BR_PROB_BASE); 1127 scale_act = REG_BR_PROB_BASE - prob_pass_thru; 1128 } 1129 for (i = 0; i < ndupl; i++) 1130 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE); 1131 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE 1132 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE); 1133 } 1134 1135 /* Loop the new bbs will belong to. */ 1136 target = e->src->loop_father; 1137 1138 /* Original loops. */ 1139 n_orig_loops = 0; 1140 for (aloop = loop->inner; aloop; aloop = aloop->next) 1141 n_orig_loops++; 1142 orig_loops = XCNEWVEC (struct loop *, n_orig_loops); 1143 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++) 1144 orig_loops[i] = aloop; 1145 1146 set_loop_copy (loop, target); 1147 1148 first_active = XNEWVEC (basic_block, n); 1149 if (is_latch) 1150 { 1151 memcpy (first_active, bbs, n * sizeof (basic_block)); 1152 first_active_latch = latch; 1153 } 1154 1155 spec_edges[SE_ORIG] = orig; 1156 spec_edges[SE_LATCH] = latch_edge; 1157 1158 place_after = e->src; 1159 for (j = 0; j < ndupl; j++) 1160 { 1161 /* Copy loops. */ 1162 copy_loops_to (orig_loops, n_orig_loops, target); 1163 1164 /* Copy bbs. */ 1165 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop, 1166 place_after); 1167 place_after = new_spec_edges[SE_LATCH]->src; 1168 1169 if (flags & DLTHE_RECORD_COPY_NUMBER) 1170 for (i = 0; i < n; i++) 1171 { 1172 gcc_assert (!new_bbs[i]->aux); 1173 new_bbs[i]->aux = (void *)(size_t)(j + 1); 1174 } 1175 1176 /* Note whether the blocks and edges belong to an irreducible loop. */ 1177 if (add_irreducible_flag) 1178 { 1179 for (i = 0; i < n; i++) 1180 new_bbs[i]->flags |= BB_DUPLICATED; 1181 for (i = 0; i < n; i++) 1182 { 1183 edge_iterator ei; 1184 new_bb = new_bbs[i]; 1185 if (new_bb->loop_father == target) 1186 new_bb->flags |= BB_IRREDUCIBLE_LOOP; 1187 1188 FOR_EACH_EDGE (ae, ei, new_bb->succs) 1189 if ((ae->dest->flags & BB_DUPLICATED) 1190 && (ae->src->loop_father == target 1191 || ae->dest->loop_father == target)) 1192 ae->flags |= EDGE_IRREDUCIBLE_LOOP; 1193 } 1194 for (i = 0; i < n; i++) 1195 new_bbs[i]->flags &= ~BB_DUPLICATED; 1196 } 1197 1198 /* Redirect the special edges. */ 1199 if (is_latch) 1200 { 1201 redirect_edge_and_branch_force (latch_edge, new_bbs[0]); 1202 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH], 1203 loop->header); 1204 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch); 1205 latch = loop->latch = new_bbs[n - 1]; 1206 e = latch_edge = new_spec_edges[SE_LATCH]; 1207 } 1208 else 1209 { 1210 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH], 1211 loop->header); 1212 redirect_edge_and_branch_force (e, new_bbs[0]); 1213 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src); 1214 e = new_spec_edges[SE_LATCH]; 1215 } 1216 1217 /* Record exit edge in this copy. */ 1218 if (orig && TEST_BIT (wont_exit, j + 1)) 1219 { 1220 if (to_remove) 1221 VEC_safe_push (edge, heap, *to_remove, new_spec_edges[SE_ORIG]); 1222 set_zero_probability (new_spec_edges[SE_ORIG]); 1223 1224 /* Scale the frequencies of the blocks dominated by the exit. */ 1225 if (bbs_to_scale) 1226 { 1227 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) 1228 { 1229 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit, 1230 REG_BR_PROB_BASE); 1231 } 1232 } 1233 } 1234 1235 /* Record the first copy in the control flow order if it is not 1236 the original loop (i.e. in case of peeling). */ 1237 if (!first_active_latch) 1238 { 1239 memcpy (first_active, new_bbs, n * sizeof (basic_block)); 1240 first_active_latch = new_bbs[n - 1]; 1241 } 1242 1243 /* Set counts and frequencies. */ 1244 if (flags & DLTHE_FLAG_UPDATE_FREQ) 1245 { 1246 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE); 1247 scale_act = RDIV (scale_act * scale_step[j], REG_BR_PROB_BASE); 1248 } 1249 } 1250 free (new_bbs); 1251 free (orig_loops); 1252 1253 /* Record the exit edge in the original loop body, and update the frequencies. */ 1254 if (orig && TEST_BIT (wont_exit, 0)) 1255 { 1256 if (to_remove) 1257 VEC_safe_push (edge, heap, *to_remove, orig); 1258 set_zero_probability (orig); 1259 1260 /* Scale the frequencies of the blocks dominated by the exit. */ 1261 if (bbs_to_scale) 1262 { 1263 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) 1264 { 1265 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit, 1266 REG_BR_PROB_BASE); 1267 } 1268 } 1269 } 1270 1271 /* Update the original loop. */ 1272 if (!is_latch) 1273 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src); 1274 if (flags & DLTHE_FLAG_UPDATE_FREQ) 1275 { 1276 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE); 1277 free (scale_step); 1278 } 1279 1280 /* Update dominators of outer blocks if affected. */ 1281 for (i = 0; i < n; i++) 1282 { 1283 basic_block dominated, dom_bb; 1284 VEC (basic_block, heap) *dom_bbs; 1285 unsigned j; 1286 1287 bb = bbs[i]; 1288 bb->aux = 0; 1289 1290 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb); 1291 FOR_EACH_VEC_ELT (basic_block, dom_bbs, j, dominated) 1292 { 1293 if (flow_bb_inside_loop_p (loop, dominated)) 1294 continue; 1295 dom_bb = nearest_common_dominator ( 1296 CDI_DOMINATORS, first_active[i], first_active_latch); 1297 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb); 1298 } 1299 VEC_free (basic_block, heap, dom_bbs); 1300 } 1301 free (first_active); 1302 1303 free (bbs); 1304 BITMAP_FREE (bbs_to_scale); 1305 1306 return true; 1307 } 1308 1309 /* A callback for make_forwarder block, to redirect all edges except for 1310 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide 1311 whether to redirect it. */ 1312 1313 edge mfb_kj_edge; 1314 bool 1315 mfb_keep_just (edge e) 1316 { 1317 return e != mfb_kj_edge; 1318 } 1319 1320 /* True when a candidate preheader BLOCK has predecessors from LOOP. */ 1321 1322 static bool 1323 has_preds_from_loop (basic_block block, struct loop *loop) 1324 { 1325 edge e; 1326 edge_iterator ei; 1327 1328 FOR_EACH_EDGE (e, ei, block->preds) 1329 if (e->src->loop_father == loop) 1330 return true; 1331 return false; 1332 } 1333 1334 /* Creates a pre-header for a LOOP. Returns newly created block. Unless 1335 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single 1336 entry; otherwise we also force preheader block to have only one successor. 1337 When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block 1338 to be a fallthru predecessor to the loop header and to have only 1339 predecessors from outside of the loop. 1340 The function also updates dominators. */ 1341 1342 basic_block 1343 create_preheader (struct loop *loop, int flags) 1344 { 1345 edge e, fallthru; 1346 basic_block dummy; 1347 int nentry = 0; 1348 bool irred = false; 1349 bool latch_edge_was_fallthru; 1350 edge one_succ_pred = NULL, single_entry = NULL; 1351 edge_iterator ei; 1352 1353 FOR_EACH_EDGE (e, ei, loop->header->preds) 1354 { 1355 if (e->src == loop->latch) 1356 continue; 1357 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0; 1358 nentry++; 1359 single_entry = e; 1360 if (single_succ_p (e->src)) 1361 one_succ_pred = e; 1362 } 1363 gcc_assert (nentry); 1364 if (nentry == 1) 1365 { 1366 bool need_forwarder_block = false; 1367 1368 /* We do not allow entry block to be the loop preheader, since we 1369 cannot emit code there. */ 1370 if (single_entry->src == ENTRY_BLOCK_PTR) 1371 need_forwarder_block = true; 1372 else 1373 { 1374 /* If we want simple preheaders, also force the preheader to have 1375 just a single successor. */ 1376 if ((flags & CP_SIMPLE_PREHEADERS) 1377 && !single_succ_p (single_entry->src)) 1378 need_forwarder_block = true; 1379 /* If we want fallthru preheaders, also create forwarder block when 1380 preheader ends with a jump or has predecessors from loop. */ 1381 else if ((flags & CP_FALLTHRU_PREHEADERS) 1382 && (JUMP_P (BB_END (single_entry->src)) 1383 || has_preds_from_loop (single_entry->src, loop))) 1384 need_forwarder_block = true; 1385 } 1386 if (! need_forwarder_block) 1387 return NULL; 1388 } 1389 1390 mfb_kj_edge = loop_latch_edge (loop); 1391 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0; 1392 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL); 1393 dummy = fallthru->src; 1394 loop->header = fallthru->dest; 1395 1396 /* Try to be clever in placing the newly created preheader. The idea is to 1397 avoid breaking any "fallthruness" relationship between blocks. 1398 1399 The preheader was created just before the header and all incoming edges 1400 to the header were redirected to the preheader, except the latch edge. 1401 So the only problematic case is when this latch edge was a fallthru 1402 edge: it is not anymore after the preheader creation so we have broken 1403 the fallthruness. We're therefore going to look for a better place. */ 1404 if (latch_edge_was_fallthru) 1405 { 1406 if (one_succ_pred) 1407 e = one_succ_pred; 1408 else 1409 e = EDGE_PRED (dummy, 0); 1410 1411 move_block_after (dummy, e->src); 1412 } 1413 1414 if (irred) 1415 { 1416 dummy->flags |= BB_IRREDUCIBLE_LOOP; 1417 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP; 1418 } 1419 1420 if (dump_file) 1421 fprintf (dump_file, "Created preheader block for loop %i\n", 1422 loop->num); 1423 1424 if (flags & CP_FALLTHRU_PREHEADERS) 1425 gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU) 1426 && !JUMP_P (BB_END (dummy))); 1427 1428 return dummy; 1429 } 1430 1431 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */ 1432 1433 void 1434 create_preheaders (int flags) 1435 { 1436 loop_iterator li; 1437 struct loop *loop; 1438 1439 if (!current_loops) 1440 return; 1441 1442 FOR_EACH_LOOP (li, loop, 0) 1443 create_preheader (loop, flags); 1444 loops_state_set (LOOPS_HAVE_PREHEADERS); 1445 } 1446 1447 /* Forces all loop latches to have only single successor. */ 1448 1449 void 1450 force_single_succ_latches (void) 1451 { 1452 loop_iterator li; 1453 struct loop *loop; 1454 edge e; 1455 1456 FOR_EACH_LOOP (li, loop, 0) 1457 { 1458 if (loop->latch != loop->header && single_succ_p (loop->latch)) 1459 continue; 1460 1461 e = find_edge (loop->latch, loop->header); 1462 1463 split_edge (e); 1464 } 1465 loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES); 1466 } 1467 1468 /* This function is called from loop_version. It splits the entry edge 1469 of the loop we want to version, adds the versioning condition, and 1470 adjust the edges to the two versions of the loop appropriately. 1471 e is an incoming edge. Returns the basic block containing the 1472 condition. 1473 1474 --- edge e ---- > [second_head] 1475 1476 Split it and insert new conditional expression and adjust edges. 1477 1478 --- edge e ---> [cond expr] ---> [first_head] 1479 | 1480 +---------> [second_head] 1481 1482 THEN_PROB is the probability of then branch of the condition. */ 1483 1484 static basic_block 1485 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head, 1486 edge e, void *cond_expr, unsigned then_prob) 1487 { 1488 basic_block new_head = NULL; 1489 edge e1; 1490 1491 gcc_assert (e->dest == second_head); 1492 1493 /* Split edge 'e'. This will create a new basic block, where we can 1494 insert conditional expr. */ 1495 new_head = split_edge (e); 1496 1497 lv_add_condition_to_bb (first_head, second_head, new_head, 1498 cond_expr); 1499 1500 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */ 1501 e = single_succ_edge (new_head); 1502 e1 = make_edge (new_head, first_head, 1503 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0); 1504 e1->probability = then_prob; 1505 e->probability = REG_BR_PROB_BASE - then_prob; 1506 e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE); 1507 e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE); 1508 1509 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head); 1510 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head); 1511 1512 /* Adjust loop header phi nodes. */ 1513 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1); 1514 1515 return new_head; 1516 } 1517 1518 /* Main entry point for Loop Versioning transformation. 1519 1520 This transformation given a condition and a loop, creates 1521 -if (condition) { loop_copy1 } else { loop_copy2 }, 1522 where loop_copy1 is the loop transformed in one way, and loop_copy2 1523 is the loop transformed in another way (or unchanged). 'condition' 1524 may be a run time test for things that were not resolved by static 1525 analysis (overlapping ranges (anti-aliasing), alignment, etc.). 1526 1527 THEN_PROB is the probability of the then edge of the if. THEN_SCALE 1528 is the ratio by that the frequencies in the original loop should 1529 be scaled. ELSE_SCALE is the ratio by that the frequencies in the 1530 new loop should be scaled. 1531 1532 If PLACE_AFTER is true, we place the new loop after LOOP in the 1533 instruction stream, otherwise it is placed before LOOP. */ 1534 1535 struct loop * 1536 loop_version (struct loop *loop, 1537 void *cond_expr, basic_block *condition_bb, 1538 unsigned then_prob, unsigned then_scale, unsigned else_scale, 1539 bool place_after) 1540 { 1541 basic_block first_head, second_head; 1542 edge entry, latch_edge, true_edge, false_edge; 1543 int irred_flag; 1544 struct loop *nloop; 1545 basic_block cond_bb; 1546 1547 /* Record entry and latch edges for the loop */ 1548 entry = loop_preheader_edge (loop); 1549 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP; 1550 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP; 1551 1552 /* Note down head of loop as first_head. */ 1553 first_head = entry->dest; 1554 1555 /* Duplicate loop. */ 1556 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1, 1557 NULL, NULL, NULL, 0)) 1558 { 1559 entry->flags |= irred_flag; 1560 return NULL; 1561 } 1562 1563 /* After duplication entry edge now points to new loop head block. 1564 Note down new head as second_head. */ 1565 second_head = entry->dest; 1566 1567 /* Split loop entry edge and insert new block with cond expr. */ 1568 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head, 1569 entry, cond_expr, then_prob); 1570 if (condition_bb) 1571 *condition_bb = cond_bb; 1572 1573 if (!cond_bb) 1574 { 1575 entry->flags |= irred_flag; 1576 return NULL; 1577 } 1578 1579 latch_edge = single_succ_edge (get_bb_copy (loop->latch)); 1580 1581 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge); 1582 nloop = loopify (latch_edge, 1583 single_pred_edge (get_bb_copy (loop->header)), 1584 cond_bb, true_edge, false_edge, 1585 false /* Do not redirect all edges. */, 1586 then_scale, else_scale); 1587 1588 /* loopify redirected latch_edge. Update its PENDING_STMTS. */ 1589 lv_flush_pending_stmts (latch_edge); 1590 1591 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */ 1592 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge); 1593 lv_flush_pending_stmts (false_edge); 1594 /* Adjust irreducible flag. */ 1595 if (irred_flag) 1596 { 1597 cond_bb->flags |= BB_IRREDUCIBLE_LOOP; 1598 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP; 1599 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP; 1600 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP; 1601 } 1602 1603 if (place_after) 1604 { 1605 basic_block *bbs = get_loop_body_in_dom_order (nloop), after; 1606 unsigned i; 1607 1608 after = loop->latch; 1609 1610 for (i = 0; i < nloop->num_nodes; i++) 1611 { 1612 move_block_after (bbs[i], after); 1613 after = bbs[i]; 1614 } 1615 free (bbs); 1616 } 1617 1618 /* At this point condition_bb is loop preheader with two successors, 1619 first_head and second_head. Make sure that loop preheader has only 1620 one successor. */ 1621 split_edge (loop_preheader_edge (loop)); 1622 split_edge (loop_preheader_edge (nloop)); 1623 1624 return nloop; 1625 } 1626 1627 /* The structure of loops might have changed. Some loops might get removed 1628 (and their headers and latches were set to NULL), loop exists might get 1629 removed (thus the loop nesting may be wrong), and some blocks and edges 1630 were changed (so the information about bb --> loop mapping does not have 1631 to be correct). But still for the remaining loops the header dominates 1632 the latch, and loops did not get new subloops (new loops might possibly 1633 get created, but we are not interested in them). Fix up the mess. 1634 1635 If CHANGED_BBS is not NULL, basic blocks whose loop has changed are 1636 marked in it. */ 1637 1638 void 1639 fix_loop_structure (bitmap changed_bbs) 1640 { 1641 basic_block bb; 1642 struct loop *loop, *ploop; 1643 loop_iterator li; 1644 bool record_exits = false; 1645 struct loop **superloop = XNEWVEC (struct loop *, number_of_loops ()); 1646 1647 /* Remove the old bb -> loop mapping. Remember the depth of the blocks in 1648 the loop hierarchy, so that we can recognize blocks whose loop nesting 1649 relationship has changed. */ 1650 FOR_EACH_BB (bb) 1651 { 1652 if (changed_bbs) 1653 bb->aux = (void *) (size_t) loop_depth (bb->loop_father); 1654 bb->loop_father = current_loops->tree_root; 1655 } 1656 1657 if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS)) 1658 { 1659 release_recorded_exits (); 1660 record_exits = true; 1661 } 1662 1663 /* Remove the dead loops from structures. We start from the innermost 1664 loops, so that when we remove the loops, we know that the loops inside 1665 are preserved, and do not waste time relinking loops that will be 1666 removed later. */ 1667 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) 1668 { 1669 if (loop->header) 1670 continue; 1671 1672 while (loop->inner) 1673 { 1674 ploop = loop->inner; 1675 flow_loop_tree_node_remove (ploop); 1676 flow_loop_tree_node_add (loop_outer (loop), ploop); 1677 } 1678 1679 /* Remove the loop and free its data. */ 1680 delete_loop (loop); 1681 } 1682 1683 /* Rescan the bodies of loops, starting from the outermost ones. We assume 1684 that no optimization interchanges the order of the loops, i.e., it cannot 1685 happen that L1 was superloop of L2 before and it is subloop of L2 now 1686 (without explicitly updating loop information). At the same time, we also 1687 determine the new loop structure. */ 1688 current_loops->tree_root->num_nodes = n_basic_blocks; 1689 FOR_EACH_LOOP (li, loop, 0) 1690 { 1691 superloop[loop->num] = loop->header->loop_father; 1692 loop->num_nodes = flow_loop_nodes_find (loop->header, loop); 1693 } 1694 1695 /* Now fix the loop nesting. */ 1696 FOR_EACH_LOOP (li, loop, 0) 1697 { 1698 ploop = superloop[loop->num]; 1699 if (ploop != loop_outer (loop)) 1700 { 1701 flow_loop_tree_node_remove (loop); 1702 flow_loop_tree_node_add (ploop, loop); 1703 } 1704 } 1705 free (superloop); 1706 1707 /* Mark the blocks whose loop has changed. */ 1708 if (changed_bbs) 1709 { 1710 FOR_EACH_BB (bb) 1711 { 1712 if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux) 1713 bitmap_set_bit (changed_bbs, bb->index); 1714 1715 bb->aux = NULL; 1716 } 1717 } 1718 1719 if (loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS)) 1720 create_preheaders (CP_SIMPLE_PREHEADERS); 1721 1722 if (loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES)) 1723 force_single_succ_latches (); 1724 1725 if (loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) 1726 mark_irreducible_loops (); 1727 1728 if (record_exits) 1729 record_loop_exits (); 1730 1731 #ifdef ENABLE_CHECKING 1732 verify_loop_structure (); 1733 #endif 1734 } 1735