1 /* Code to analyze doloop loops in order for targets to perform late 2 optimizations converting doloops to other forms of hardware loops. 3 Copyright (C) 2011 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "tm.h" 25 #include "rtl.h" 26 #include "flags.h" 27 #include "expr.h" 28 #include "hard-reg-set.h" 29 #include "regs.h" 30 #include "basic-block.h" 31 #include "tm_p.h" 32 #include "df.h" 33 #include "cfglayout.h" 34 #include "cfgloop.h" 35 #include "output.h" 36 #include "recog.h" 37 #include "target.h" 38 #include "hw-doloop.h" 39 40 #ifdef HAVE_doloop_end 41 42 /* Dump information collected in LOOPS. */ 43 static void 44 dump_hwloops (hwloop_info loops) 45 { 46 hwloop_info loop; 47 48 for (loop = loops; loop; loop = loop->next) 49 { 50 hwloop_info i; 51 basic_block b; 52 unsigned ix; 53 54 fprintf (dump_file, ";; loop %d: ", loop->loop_no); 55 if (loop->bad) 56 fprintf (dump_file, "(bad) "); 57 fprintf (dump_file, "{head:%d, depth:%d, reg:%u}", 58 loop->head == NULL ? -1 : loop->head->index, 59 loop->depth, REGNO (loop->iter_reg)); 60 61 fprintf (dump_file, " blocks: [ "); 62 for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++) 63 fprintf (dump_file, "%d ", b->index); 64 fprintf (dump_file, "] "); 65 66 fprintf (dump_file, " inner loops: [ "); 67 for (ix = 0; VEC_iterate (hwloop_info, loop->loops, ix, i); ix++) 68 fprintf (dump_file, "%d ", i->loop_no); 69 fprintf (dump_file, "]\n"); 70 } 71 fprintf (dump_file, "\n"); 72 } 73 74 /* Return true if BB is part of LOOP. */ 75 static bool 76 bb_in_loop_p (hwloop_info loop, basic_block bb) 77 { 78 return bitmap_bit_p (loop->block_bitmap, bb->index); 79 } 80 81 /* Scan the blocks of LOOP (and its inferiors), and record things such as 82 hard registers set, jumps out of and within the loop. */ 83 static void 84 scan_loop (hwloop_info loop) 85 { 86 unsigned ix; 87 basic_block bb; 88 89 if (loop->bad) 90 return; 91 92 if (REGNO_REG_SET_P (df_get_live_in (loop->successor), 93 REGNO (loop->iter_reg))) 94 loop->iter_reg_used_outside = true; 95 96 for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++) 97 { 98 rtx insn; 99 edge e; 100 edge_iterator ei; 101 102 if (bb != loop->tail) 103 FOR_EACH_EDGE (e, ei, bb->succs) 104 { 105 if (bb_in_loop_p (loop, e->dest)) 106 { 107 if (!(e->flags & EDGE_FALLTHRU)) 108 loop->jumps_within = true; 109 } 110 else 111 { 112 loop->jumps_outof = true; 113 if (!loop->bad) 114 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest), 115 REGNO (loop->iter_reg))); 116 } 117 } 118 119 for (insn = BB_HEAD (bb); 120 insn != NEXT_INSN (BB_END (bb)); 121 insn = NEXT_INSN (insn)) 122 { 123 df_ref *def_rec; 124 HARD_REG_SET set_this_insn; 125 126 if (!NONDEBUG_INSN_P (insn)) 127 continue; 128 129 if (recog_memoized (insn) < 0 130 && (GET_CODE (PATTERN (insn)) == ASM_INPUT 131 || asm_noperands (PATTERN (insn)) >= 0)) 132 loop->has_asm = true; 133 134 CLEAR_HARD_REG_SET (set_this_insn); 135 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) 136 { 137 rtx dreg = DF_REF_REG (*def_rec); 138 139 if (!REG_P (dreg)) 140 continue; 141 142 add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg), 143 REGNO (dreg)); 144 } 145 146 if (insn == loop->loop_end) 147 CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg)); 148 else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn))) 149 loop->iter_reg_used = true; 150 IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn); 151 } 152 } 153 } 154 155 /* Compute the incoming_dest and incoming_src members of LOOP by 156 identifying the edges that jump into the loop. If there is more 157 than one block that jumps into the loop, incoming_src will be set 158 to NULL; likewise, if there is more than one block in the loop that 159 is the destination of an incoming edge, incoming_dest will be NULL. 160 161 Return true if either of these two fields is nonnull, false 162 otherwise. */ 163 static bool 164 process_incoming_edges (hwloop_info loop) 165 { 166 edge e; 167 edge_iterator ei; 168 bool first = true; 169 170 FOR_EACH_EDGE (e, ei, loop->incoming) 171 { 172 if (first) 173 { 174 loop->incoming_src = e->src; 175 loop->incoming_dest = e->dest; 176 first = false; 177 } 178 else 179 { 180 if (e->dest != loop->incoming_dest) 181 loop->incoming_dest = NULL; 182 if (e->src != loop->incoming_src) 183 loop->incoming_src = NULL; 184 } 185 } 186 if (loop->incoming_src == NULL && loop->incoming_dest == NULL) 187 return false; 188 189 return true; 190 } 191 192 /* Try to identify a forwarder block that jump into LOOP, and add it to 193 the set of blocks in the loop, updating the vector of incoming blocks as 194 well. This transformation gives a second chance to loops we couldn't 195 otherwise handle by increasing the chance that we'll end up with one 196 incoming_src block. 197 Return true if we made a change, false otherwise. */ 198 static bool 199 add_forwarder_blocks (hwloop_info loop) 200 { 201 edge e; 202 edge_iterator ei; 203 204 FOR_EACH_EDGE (e, ei, loop->incoming) 205 { 206 if (forwarder_block_p (e->src)) 207 { 208 edge e2; 209 edge_iterator ei2; 210 211 if (dump_file) 212 fprintf (dump_file, 213 ";; Adding forwarder block %d to loop %d and retrying\n", 214 e->src->index, loop->loop_no); 215 VEC_safe_push (basic_block, heap, loop->blocks, e->src); 216 bitmap_set_bit (loop->block_bitmap, e->src->index); 217 FOR_EACH_EDGE (e2, ei2, e->src->preds) 218 VEC_safe_push (edge, gc, loop->incoming, e2); 219 VEC_unordered_remove (edge, loop->incoming, ei.index); 220 return true; 221 } 222 } 223 return false; 224 } 225 226 /* Called from reorg_loops when a potential loop end is found. LOOP is 227 a newly set up structure describing the loop, it is this function's 228 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the 229 loop_end insn and its enclosing basic block. REG is the loop counter 230 register. 231 For our purposes, a loop is defined by the set of blocks reachable from 232 the loop head in which the loop counter register is live. This matches 233 the expected use; targets that call into this code usually replace the 234 loop counter with a different special register. */ 235 static void 236 discover_loop (hwloop_info loop, basic_block tail_bb, rtx tail_insn, rtx reg) 237 { 238 bool found_tail; 239 unsigned dwork = 0; 240 basic_block bb; 241 VEC (basic_block,heap) *works; 242 243 loop->tail = tail_bb; 244 loop->loop_end = tail_insn; 245 loop->iter_reg = reg; 246 loop->incoming = VEC_alloc (edge, gc, 2); 247 loop->start_label = JUMP_LABEL (tail_insn); 248 249 if (EDGE_COUNT (tail_bb->succs) != 2) 250 { 251 loop->bad = true; 252 return; 253 } 254 loop->head = BRANCH_EDGE (tail_bb)->dest; 255 loop->successor = FALLTHRU_EDGE (tail_bb)->dest; 256 257 works = VEC_alloc (basic_block, heap, 20); 258 VEC_safe_push (basic_block, heap, works, loop->head); 259 260 found_tail = false; 261 for (dwork = 0; VEC_iterate (basic_block, works, dwork, bb); dwork++) 262 { 263 edge e; 264 edge_iterator ei; 265 if (bb == EXIT_BLOCK_PTR) 266 { 267 /* We've reached the exit block. The loop must be bad. */ 268 if (dump_file) 269 fprintf (dump_file, 270 ";; Loop is bad - reached exit block while scanning\n"); 271 loop->bad = true; 272 break; 273 } 274 275 if (bitmap_bit_p (loop->block_bitmap, bb->index)) 276 continue; 277 278 /* We've not seen this block before. Add it to the loop's 279 list and then add each successor to the work list. */ 280 281 VEC_safe_push (basic_block, heap, loop->blocks, bb); 282 bitmap_set_bit (loop->block_bitmap, bb->index); 283 284 if (bb == tail_bb) 285 found_tail = true; 286 else 287 { 288 FOR_EACH_EDGE (e, ei, bb->succs) 289 { 290 basic_block succ = EDGE_SUCC (bb, ei.index)->dest; 291 if (REGNO_REG_SET_P (df_get_live_in (succ), 292 REGNO (loop->iter_reg))) 293 VEC_safe_push (basic_block, heap, works, succ); 294 } 295 } 296 } 297 298 if (!found_tail) 299 loop->bad = true; 300 301 /* Find the predecessor, and make sure nothing else jumps into this loop. */ 302 if (!loop->bad) 303 { 304 FOR_EACH_VEC_ELT (basic_block, loop->blocks, dwork, bb) 305 { 306 edge e; 307 edge_iterator ei; 308 FOR_EACH_EDGE (e, ei, bb->preds) 309 { 310 basic_block pred = e->src; 311 312 if (!bb_in_loop_p (loop, pred)) 313 { 314 if (dump_file) 315 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n", 316 loop->loop_no, pred->index, 317 e->dest->index); 318 VEC_safe_push (edge, gc, loop->incoming, e); 319 } 320 } 321 } 322 323 if (!process_incoming_edges (loop)) 324 { 325 if (dump_file) 326 fprintf (dump_file, 327 ";; retrying loop %d with forwarder blocks\n", 328 loop->loop_no); 329 if (!add_forwarder_blocks (loop)) 330 { 331 if (dump_file) 332 fprintf (dump_file, ";; No forwarder blocks found\n"); 333 loop->bad = true; 334 } 335 else if (!process_incoming_edges (loop)) 336 { 337 if (dump_file) 338 fprintf (dump_file, 339 ";; can't find suitable entry for loop %d\n", 340 loop->loop_no); 341 } 342 } 343 } 344 345 VEC_free (basic_block, heap, works); 346 } 347 348 /* Analyze the structure of the loops in the current function. Use 349 STACK for bitmap allocations. Returns all the valid candidates for 350 hardware loops found in this function. HOOKS is the argument 351 passed to reorg_loops, used here to find the iteration registers 352 from a loop_end pattern. */ 353 static hwloop_info 354 discover_loops (bitmap_obstack *stack, struct hw_doloop_hooks *hooks) 355 { 356 hwloop_info loops = NULL; 357 hwloop_info loop; 358 basic_block bb; 359 int nloops = 0; 360 361 /* Find all the possible loop tails. This means searching for every 362 loop_end instruction. For each one found, create a hwloop_info 363 structure and add the head block to the work list. */ 364 FOR_EACH_BB (bb) 365 { 366 rtx tail = BB_END (bb); 367 rtx insn, reg; 368 369 while (tail && GET_CODE (tail) == NOTE && tail != BB_HEAD (bb)) 370 tail = PREV_INSN (tail); 371 372 if (tail == NULL_RTX) 373 continue; 374 375 if (!JUMP_P (tail)) 376 continue; 377 reg = hooks->end_pattern_reg (tail); 378 if (reg == NULL_RTX) 379 continue; 380 381 /* A possible loop end */ 382 383 /* There's a degenerate case we can handle - an empty loop consisting 384 of only a back branch. Handle that by deleting the branch. */ 385 insn = JUMP_LABEL (tail); 386 while (insn && !NONDEBUG_INSN_P (insn)) 387 insn = NEXT_INSN (insn); 388 if (insn == tail) 389 { 390 basic_block succ = FALLTHRU_EDGE (bb)->dest; 391 if (dump_file) 392 { 393 fprintf (dump_file, ";; degenerate loop ending at\n"); 394 print_rtl_single (dump_file, tail); 395 } 396 if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg))) 397 { 398 if (dump_file) 399 fprintf (dump_file, ";; deleting it\n"); 400 delete_insn_and_edges (tail); 401 continue; 402 } 403 } 404 405 loop = XCNEW (struct hwloop_info_d); 406 loop->next = loops; 407 loops = loop; 408 loop->loop_no = nloops++; 409 loop->blocks = VEC_alloc (basic_block, heap, 20); 410 loop->block_bitmap = BITMAP_ALLOC (stack); 411 412 if (dump_file) 413 { 414 fprintf (dump_file, ";; potential loop %d ending at\n", 415 loop->loop_no); 416 print_rtl_single (dump_file, tail); 417 } 418 419 discover_loop (loop, bb, tail, reg); 420 } 421 422 /* Compute loop nestings. Given two loops A and B, either the sets 423 of their blocks don't intersect at all, or one is the subset of 424 the other, or the blocks don't form a good nesting structure. */ 425 for (loop = loops; loop; loop = loop->next) 426 { 427 hwloop_info other; 428 429 if (loop->bad) 430 continue; 431 432 for (other = loops; other; other = other->next) 433 { 434 if (other->bad) 435 continue; 436 437 if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap)) 438 continue; 439 if (!bitmap_intersect_compl_p (other->block_bitmap, 440 loop->block_bitmap)) 441 VEC_safe_push (hwloop_info, heap, loop->loops, other); 442 else if (!bitmap_intersect_compl_p (loop->block_bitmap, 443 other->block_bitmap)) 444 VEC_safe_push (hwloop_info, heap, other->loops, loop); 445 else 446 { 447 if (dump_file) 448 fprintf (dump_file, 449 ";; can't find suitable nesting for loops %d and %d\n", 450 loop->loop_no, other->loop_no); 451 loop->bad = other->bad = true; 452 } 453 } 454 } 455 456 if (dump_file) 457 dump_hwloops (loops); 458 459 return loops; 460 } 461 462 /* Free up the loop structures in LOOPS. */ 463 static void 464 free_loops (hwloop_info loops) 465 { 466 while (loops) 467 { 468 hwloop_info loop = loops; 469 loops = loop->next; 470 VEC_free (hwloop_info, heap, loop->loops); 471 VEC_free (basic_block, heap, loop->blocks); 472 BITMAP_FREE (loop->block_bitmap); 473 XDELETE (loop); 474 } 475 } 476 477 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux) 478 479 /* Initialize the aux fields to give ascending indices to basic blocks. */ 480 static void 481 set_bb_indices (void) 482 { 483 basic_block bb; 484 intptr_t index; 485 486 index = 0; 487 FOR_EACH_BB (bb) 488 bb->aux = (void *) index++; 489 } 490 491 /* The taken-branch edge from the loop end can actually go forward. 492 If the target's hardware loop support requires that the loop end be 493 after the loop start, try to reorder a loop's basic blocks when we 494 find such a case. 495 This is not very aggressive; it only moves at most one block. It 496 does not introduce new branches into loops; it may remove them, or 497 it may switch fallthru/jump edges. */ 498 static void 499 reorder_loops (hwloop_info loops) 500 { 501 basic_block bb; 502 hwloop_info loop; 503 504 cfg_layout_initialize (0); 505 506 set_bb_indices (); 507 508 for (loop = loops; loop; loop = loop->next) 509 { 510 edge e; 511 edge_iterator ei; 512 513 if (loop->bad) 514 continue; 515 516 if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail)) 517 continue; 518 519 FOR_EACH_EDGE (e, ei, loop->head->succs) 520 { 521 if (bitmap_bit_p (loop->block_bitmap, e->dest->index) 522 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail)) 523 { 524 basic_block start_bb = e->dest; 525 basic_block start_prev_bb = start_bb->prev_bb; 526 527 if (dump_file) 528 fprintf (dump_file, ";; Moving block %d before block %d\n", 529 loop->head->index, start_bb->index); 530 loop->head->prev_bb->next_bb = loop->head->next_bb; 531 loop->head->next_bb->prev_bb = loop->head->prev_bb; 532 533 loop->head->prev_bb = start_prev_bb; 534 loop->head->next_bb = start_bb; 535 start_prev_bb->next_bb = start_bb->prev_bb = loop->head; 536 537 set_bb_indices (); 538 break; 539 } 540 } 541 loops = loops->next; 542 } 543 544 FOR_EACH_BB (bb) 545 { 546 if (bb->next_bb != EXIT_BLOCK_PTR) 547 bb->aux = bb->next_bb; 548 else 549 bb->aux = NULL; 550 } 551 cfg_layout_finalize (); 552 clear_aux_for_blocks (); 553 df_analyze (); 554 } 555 556 /* Call the OPT function for LOOP and all of its sub-loops. This is 557 done in a depth-first search; innermost loops are visited first. 558 OPTIMIZE and FAIL are the functions passed to reorg_loops by the 559 target's reorg pass. */ 560 static void 561 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks) 562 { 563 int ix; 564 hwloop_info inner; 565 int inner_depth = 0; 566 567 if (loop->visited) 568 return; 569 570 loop->visited = 1; 571 572 if (loop->bad) 573 { 574 if (dump_file) 575 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no); 576 goto bad_loop; 577 } 578 579 /* Every loop contains in its list of inner loops every loop nested inside 580 it, even if there are intermediate loops. This works because we're doing 581 a depth-first search here and never visit a loop more than once. 582 Recursion depth is effectively limited by the number of available 583 hardware registers. */ 584 for (ix = 0; VEC_iterate (hwloop_info, loop->loops, ix, inner); ix++) 585 { 586 optimize_loop (inner, hooks); 587 588 if (!inner->bad && inner_depth < inner->depth) 589 inner_depth = inner->depth; 590 /* The set of registers may be changed while optimizing the inner 591 loop. */ 592 IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop); 593 } 594 595 loop->depth = inner_depth + 1; 596 597 if (hooks->opt (loop)) 598 return; 599 600 bad_loop: 601 if (dump_file) 602 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no); 603 604 loop->bad = true; 605 hooks->fail (loop); 606 } 607 608 /* This function can be used from a port's machine_dependent_reorg to 609 find and analyze loops that end in loop_end instructions. It uses 610 a set of function pointers in HOOKS to call back into the 611 target-specific functions to perform the actual machine-specific 612 transformations. 613 614 Such transformations typically involve additional set-up 615 instructions before the loop, to define loop bounds or set up a 616 special loop counter register. 617 618 DO_REORDER should be set to true if we should try to use the 619 reorder_loops function to ensure the loop end occurs after the loop 620 start. This is for use by targets where the loop hardware requires 621 this condition. 622 623 HOOKS is used to pass in target specific hooks; see 624 hw-doloop.h. */ 625 void 626 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks) 627 { 628 hwloop_info loops = NULL; 629 hwloop_info loop; 630 bitmap_obstack stack; 631 632 df_live_add_problem (); 633 df_live_set_all_dirty (); 634 df_analyze (); 635 636 bitmap_obstack_initialize (&stack); 637 638 if (dump_file) 639 fprintf (dump_file, ";; Find loops, first pass\n\n"); 640 641 loops = discover_loops (&stack, hooks); 642 643 if (do_reorder) 644 { 645 reorder_loops (loops); 646 free_loops (loops); 647 648 if (dump_file) 649 fprintf (dump_file, ";; Find loops, second pass\n\n"); 650 651 loops = discover_loops (&stack, hooks); 652 } 653 654 for (loop = loops; loop; loop = loop->next) 655 scan_loop (loop); 656 657 /* Now apply the optimizations. */ 658 for (loop = loops; loop; loop = loop->next) 659 optimize_loop (loop, hooks); 660 661 if (dump_file) 662 { 663 fprintf (dump_file, ";; After hardware loops optimization:\n\n"); 664 dump_hwloops (loops); 665 } 666 667 free_loops (loops); 668 669 if (dump_file) 670 print_rtl (dump_file, get_insns ()); 671 } 672 #endif 673