1 /* Code to analyze doloop loops in order for targets to perform late 2 optimizations converting doloops to other forms of hardware loops. 3 Copyright (C) 2011-2018 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "rtl.h" 26 #include "df.h" 27 #include "insn-config.h" 28 #include "regs.h" 29 #include "memmodel.h" 30 #include "emit-rtl.h" 31 #include "recog.h" 32 #include "cfgrtl.h" 33 #include "hw-doloop.h" 34 #include "dumpfile.h" 35 36 /* Dump information collected in LOOPS. */ 37 static void 38 dump_hwloops (hwloop_info loops) 39 { 40 hwloop_info loop; 41 42 for (loop = loops; loop; loop = loop->next) 43 { 44 hwloop_info i; 45 basic_block b; 46 unsigned ix; 47 48 fprintf (dump_file, ";; loop %d: ", loop->loop_no); 49 if (loop->bad) 50 fprintf (dump_file, "(bad) "); 51 fprintf (dump_file, "{head:%d, depth:%d, reg:%u}", 52 loop->head == NULL ? -1 : loop->head->index, 53 loop->depth, REGNO (loop->iter_reg)); 54 55 fprintf (dump_file, " blocks: [ "); 56 for (ix = 0; loop->blocks.iterate (ix, &b); ix++) 57 fprintf (dump_file, "%d ", b->index); 58 fprintf (dump_file, "] "); 59 60 fprintf (dump_file, " inner loops: [ "); 61 for (ix = 0; loop->loops.iterate (ix, &i); ix++) 62 fprintf (dump_file, "%d ", i->loop_no); 63 fprintf (dump_file, "]\n"); 64 } 65 fprintf (dump_file, "\n"); 66 } 67 68 /* Return true if BB is part of LOOP. */ 69 static bool 70 bb_in_loop_p (hwloop_info loop, basic_block bb) 71 { 72 return bitmap_bit_p (loop->block_bitmap, bb->index); 73 } 74 75 /* Scan the blocks of LOOP (and its inferiors), and record things such as 76 hard registers set, jumps out of and within the loop. */ 77 static void 78 scan_loop (hwloop_info loop) 79 { 80 unsigned ix; 81 basic_block bb; 82 83 if (loop->bad) 84 return; 85 86 if (REGNO_REG_SET_P (df_get_live_in (loop->successor), 87 REGNO (loop->iter_reg))) 88 loop->iter_reg_used_outside = true; 89 90 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++) 91 { 92 rtx_insn *insn; 93 edge e; 94 edge_iterator ei; 95 96 if (bb != loop->tail) 97 FOR_EACH_EDGE (e, ei, bb->succs) 98 { 99 if (bb_in_loop_p (loop, e->dest)) 100 { 101 if (!(e->flags & EDGE_FALLTHRU)) 102 loop->jumps_within = true; 103 } 104 else 105 { 106 loop->jumps_outof = true; 107 if (!loop->bad) 108 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest), 109 REGNO (loop->iter_reg))); 110 } 111 } 112 113 for (insn = BB_HEAD (bb); 114 insn != NEXT_INSN (BB_END (bb)); 115 insn = NEXT_INSN (insn)) 116 { 117 df_ref def; 118 HARD_REG_SET set_this_insn; 119 120 if (!NONDEBUG_INSN_P (insn)) 121 continue; 122 123 if (recog_memoized (insn) < 0 124 && (GET_CODE (PATTERN (insn)) == ASM_INPUT 125 || asm_noperands (PATTERN (insn)) >= 0)) 126 loop->has_asm = true; 127 128 CLEAR_HARD_REG_SET (set_this_insn); 129 FOR_EACH_INSN_DEF (def, insn) 130 { 131 rtx dreg = DF_REF_REG (def); 132 133 if (!REG_P (dreg)) 134 continue; 135 136 add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg), 137 REGNO (dreg)); 138 } 139 140 if (insn == loop->loop_end) 141 CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg)); 142 else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn))) 143 loop->iter_reg_used = true; 144 IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn); 145 } 146 } 147 } 148 149 /* Compute the incoming_dest and incoming_src members of LOOP by 150 identifying the edges that jump into the loop. If there is more 151 than one block that jumps into the loop, incoming_src will be set 152 to NULL; likewise, if there is more than one block in the loop that 153 is the destination of an incoming edge, incoming_dest will be NULL. 154 155 Return true if either of these two fields is nonnull, false 156 otherwise. */ 157 static bool 158 process_incoming_edges (hwloop_info loop) 159 { 160 edge e; 161 edge_iterator ei; 162 bool first = true; 163 164 FOR_EACH_EDGE (e, ei, loop->incoming) 165 { 166 if (first) 167 { 168 loop->incoming_src = e->src; 169 loop->incoming_dest = e->dest; 170 first = false; 171 } 172 else 173 { 174 if (e->dest != loop->incoming_dest) 175 loop->incoming_dest = NULL; 176 if (e->src != loop->incoming_src) 177 loop->incoming_src = NULL; 178 } 179 } 180 if (loop->incoming_src == NULL && loop->incoming_dest == NULL) 181 return false; 182 183 return true; 184 } 185 186 /* Try to identify a forwarder block that jump into LOOP, and add it to 187 the set of blocks in the loop, updating the vector of incoming blocks as 188 well. This transformation gives a second chance to loops we couldn't 189 otherwise handle by increasing the chance that we'll end up with one 190 incoming_src block. 191 Return true if we made a change, false otherwise. */ 192 static bool 193 add_forwarder_blocks (hwloop_info loop) 194 { 195 edge e; 196 edge_iterator ei; 197 198 FOR_EACH_EDGE (e, ei, loop->incoming) 199 { 200 if (forwarder_block_p (e->src)) 201 { 202 edge e2; 203 edge_iterator ei2; 204 205 if (dump_file) 206 fprintf (dump_file, 207 ";; Adding forwarder block %d to loop %d and retrying\n", 208 e->src->index, loop->loop_no); 209 loop->blocks.safe_push (e->src); 210 bitmap_set_bit (loop->block_bitmap, e->src->index); 211 FOR_EACH_EDGE (e2, ei2, e->src->preds) 212 vec_safe_push (loop->incoming, e2); 213 loop->incoming->unordered_remove (ei.index); 214 return true; 215 } 216 } 217 return false; 218 } 219 220 /* Called from reorg_loops when a potential loop end is found. LOOP is 221 a newly set up structure describing the loop, it is this function's 222 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the 223 loop_end insn and its enclosing basic block. REG is the loop counter 224 register. 225 For our purposes, a loop is defined by the set of blocks reachable from 226 the loop head in which the loop counter register is live. This matches 227 the expected use; targets that call into this code usually replace the 228 loop counter with a different special register. */ 229 static void 230 discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg) 231 { 232 bool found_tail; 233 unsigned dwork = 0; 234 basic_block bb; 235 236 loop->tail = tail_bb; 237 loop->loop_end = tail_insn; 238 loop->iter_reg = reg; 239 vec_alloc (loop->incoming, 2); 240 loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn)); 241 242 if (EDGE_COUNT (tail_bb->succs) != 2) 243 { 244 loop->bad = true; 245 return; 246 } 247 loop->head = BRANCH_EDGE (tail_bb)->dest; 248 loop->successor = FALLTHRU_EDGE (tail_bb)->dest; 249 250 auto_vec<basic_block, 20> works; 251 works.safe_push (loop->head); 252 253 found_tail = false; 254 for (dwork = 0; works.iterate (dwork, &bb); dwork++) 255 { 256 edge e; 257 edge_iterator ei; 258 if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) 259 { 260 /* We've reached the exit block. The loop must be bad. */ 261 if (dump_file) 262 fprintf (dump_file, 263 ";; Loop is bad - reached exit block while scanning\n"); 264 loop->bad = true; 265 break; 266 } 267 268 if (bitmap_bit_p (loop->block_bitmap, bb->index)) 269 continue; 270 271 /* We've not seen this block before. Add it to the loop's 272 list and then add each successor to the work list. */ 273 274 loop->blocks.safe_push (bb); 275 bitmap_set_bit (loop->block_bitmap, bb->index); 276 277 if (bb == tail_bb) 278 found_tail = true; 279 else 280 { 281 FOR_EACH_EDGE (e, ei, bb->succs) 282 { 283 basic_block succ = EDGE_SUCC (bb, ei.index)->dest; 284 if (REGNO_REG_SET_P (df_get_live_in (succ), 285 REGNO (loop->iter_reg))) 286 works.safe_push (succ); 287 } 288 } 289 } 290 291 if (!found_tail) 292 loop->bad = true; 293 294 /* Find the predecessor, and make sure nothing else jumps into this loop. */ 295 if (!loop->bad) 296 { 297 FOR_EACH_VEC_ELT (loop->blocks, dwork, bb) 298 { 299 edge e; 300 edge_iterator ei; 301 FOR_EACH_EDGE (e, ei, bb->preds) 302 { 303 basic_block pred = e->src; 304 305 if (!bb_in_loop_p (loop, pred)) 306 { 307 if (dump_file) 308 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n", 309 loop->loop_no, pred->index, 310 e->dest->index); 311 vec_safe_push (loop->incoming, e); 312 } 313 } 314 } 315 316 if (!process_incoming_edges (loop)) 317 { 318 if (dump_file) 319 fprintf (dump_file, 320 ";; retrying loop %d with forwarder blocks\n", 321 loop->loop_no); 322 if (!add_forwarder_blocks (loop)) 323 { 324 if (dump_file) 325 fprintf (dump_file, ";; No forwarder blocks found\n"); 326 loop->bad = true; 327 } 328 else if (!process_incoming_edges (loop)) 329 { 330 if (dump_file) 331 fprintf (dump_file, 332 ";; can't find suitable entry for loop %d\n", 333 loop->loop_no); 334 } 335 } 336 } 337 } 338 339 /* Analyze the structure of the loops in the current function. Use 340 LOOP_STACK for bitmap allocations. Returns all the valid candidates for 341 hardware loops found in this function. HOOKS is the argument 342 passed to reorg_loops, used here to find the iteration registers 343 from a loop_end pattern. */ 344 static hwloop_info 345 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks) 346 { 347 hwloop_info loops = NULL; 348 hwloop_info loop; 349 basic_block bb; 350 int nloops = 0; 351 352 /* Find all the possible loop tails. This means searching for every 353 loop_end instruction. For each one found, create a hwloop_info 354 structure and add the head block to the work list. */ 355 FOR_EACH_BB_FN (bb, cfun) 356 { 357 rtx_insn *tail = BB_END (bb); 358 rtx_insn *insn; 359 rtx reg; 360 361 while (tail && NOTE_P (tail) && tail != BB_HEAD (bb)) 362 tail = PREV_INSN (tail); 363 364 if (tail == NULL_RTX) 365 continue; 366 367 if (!JUMP_P (tail)) 368 continue; 369 reg = hooks->end_pattern_reg (tail); 370 if (reg == NULL_RTX) 371 continue; 372 373 /* A possible loop end */ 374 375 /* There's a degenerate case we can handle - an empty loop consisting 376 of only a back branch. Handle that by deleting the branch. */ 377 insn = JUMP_LABEL_AS_INSN (tail); 378 while (insn && !NONDEBUG_INSN_P (insn)) 379 insn = NEXT_INSN (insn); 380 if (insn == tail) 381 { 382 basic_block succ = FALLTHRU_EDGE (bb)->dest; 383 if (dump_file) 384 { 385 fprintf (dump_file, ";; degenerate loop ending at\n"); 386 print_rtl_single (dump_file, tail); 387 } 388 if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg))) 389 { 390 if (dump_file) 391 fprintf (dump_file, ";; deleting it\n"); 392 delete_insn_and_edges (tail); 393 continue; 394 } 395 } 396 397 loop = XCNEW (struct hwloop_info_d); 398 loop->next = loops; 399 loops = loop; 400 loop->loop_no = nloops++; 401 loop->blocks.create (20); 402 loop->block_bitmap = BITMAP_ALLOC (loop_stack); 403 404 if (dump_file) 405 { 406 fprintf (dump_file, ";; potential loop %d ending at\n", 407 loop->loop_no); 408 print_rtl_single (dump_file, tail); 409 } 410 411 discover_loop (loop, bb, tail, reg); 412 } 413 414 /* Compute loop nestings. Given two loops A and B, either the sets 415 of their blocks don't intersect at all, or one is the subset of 416 the other, or the blocks don't form a good nesting structure. */ 417 for (loop = loops; loop; loop = loop->next) 418 { 419 hwloop_info other; 420 421 if (loop->bad) 422 continue; 423 424 for (other = loops; other; other = other->next) 425 { 426 if (other->bad) 427 continue; 428 429 if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap)) 430 continue; 431 if (!bitmap_intersect_compl_p (other->block_bitmap, 432 loop->block_bitmap)) 433 loop->loops.safe_push (other); 434 else if (!bitmap_intersect_compl_p (loop->block_bitmap, 435 other->block_bitmap)) 436 other->loops.safe_push (loop); 437 else 438 { 439 if (dump_file) 440 fprintf (dump_file, 441 ";; can't find suitable nesting for loops %d and %d\n", 442 loop->loop_no, other->loop_no); 443 loop->bad = other->bad = true; 444 } 445 } 446 } 447 448 if (dump_file) 449 dump_hwloops (loops); 450 451 return loops; 452 } 453 454 /* Free up the loop structures in LOOPS. */ 455 static void 456 free_loops (hwloop_info loops) 457 { 458 while (loops) 459 { 460 hwloop_info loop = loops; 461 loops = loop->next; 462 loop->loops.release (); 463 loop->blocks.release (); 464 BITMAP_FREE (loop->block_bitmap); 465 XDELETE (loop); 466 } 467 } 468 469 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux) 470 471 /* Initialize the aux fields to give ascending indices to basic blocks. */ 472 static void 473 set_bb_indices (void) 474 { 475 basic_block bb; 476 intptr_t index; 477 478 index = 0; 479 FOR_EACH_BB_FN (bb, cfun) 480 bb->aux = (void *) index++; 481 } 482 483 /* The taken-branch edge from the loop end can actually go forward. 484 If the target's hardware loop support requires that the loop end be 485 after the loop start, try to reorder a loop's basic blocks when we 486 find such a case. 487 This is not very aggressive; it only moves at most one block. It 488 does not introduce new branches into loops; it may remove them, or 489 it may switch fallthru/jump edges. */ 490 static void 491 reorder_loops (hwloop_info loops) 492 { 493 basic_block bb; 494 hwloop_info loop; 495 496 cfg_layout_initialize (0); 497 498 set_bb_indices (); 499 500 for (loop = loops; loop; loop = loop->next) 501 { 502 edge e; 503 edge_iterator ei; 504 505 if (loop->bad) 506 continue; 507 508 if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail)) 509 continue; 510 511 FOR_EACH_EDGE (e, ei, loop->head->succs) 512 { 513 if (bitmap_bit_p (loop->block_bitmap, e->dest->index) 514 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail)) 515 { 516 basic_block start_bb = e->dest; 517 basic_block start_prev_bb = start_bb->prev_bb; 518 519 if (dump_file) 520 fprintf (dump_file, ";; Moving block %d before block %d\n", 521 loop->head->index, start_bb->index); 522 loop->head->prev_bb->next_bb = loop->head->next_bb; 523 loop->head->next_bb->prev_bb = loop->head->prev_bb; 524 525 loop->head->prev_bb = start_prev_bb; 526 loop->head->next_bb = start_bb; 527 start_prev_bb->next_bb = start_bb->prev_bb = loop->head; 528 529 set_bb_indices (); 530 break; 531 } 532 } 533 loops = loops->next; 534 } 535 536 FOR_EACH_BB_FN (bb, cfun) 537 { 538 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun)) 539 bb->aux = bb->next_bb; 540 else 541 bb->aux = NULL; 542 } 543 cfg_layout_finalize (); 544 clear_aux_for_blocks (); 545 df_analyze (); 546 } 547 548 /* Call the OPT function for LOOP and all of its sub-loops. This is 549 done in a depth-first search; innermost loops are visited first. 550 OPTIMIZE and FAIL are the functions passed to reorg_loops by the 551 target's reorg pass. */ 552 static void 553 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks) 554 { 555 int ix; 556 hwloop_info inner; 557 int inner_depth = 0; 558 559 if (loop->visited) 560 return; 561 562 loop->visited = 1; 563 564 if (loop->bad) 565 { 566 if (dump_file) 567 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no); 568 goto bad_loop; 569 } 570 571 /* Every loop contains in its list of inner loops every loop nested inside 572 it, even if there are intermediate loops. This works because we're doing 573 a depth-first search here and never visit a loop more than once. 574 Recursion depth is effectively limited by the number of available 575 hardware registers. */ 576 for (ix = 0; loop->loops.iterate (ix, &inner); ix++) 577 { 578 optimize_loop (inner, hooks); 579 580 if (!inner->bad && inner_depth < inner->depth) 581 inner_depth = inner->depth; 582 /* The set of registers may be changed while optimizing the inner 583 loop. */ 584 IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop); 585 } 586 587 loop->depth = inner_depth + 1; 588 589 if (hooks->opt (loop)) 590 return; 591 592 bad_loop: 593 if (dump_file) 594 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no); 595 596 loop->bad = true; 597 hooks->fail (loop); 598 } 599 600 /* This function can be used from a port's machine_dependent_reorg to 601 find and analyze loops that end in loop_end instructions. It uses 602 a set of function pointers in HOOKS to call back into the 603 target-specific functions to perform the actual machine-specific 604 transformations. 605 606 Such transformations typically involve additional set-up 607 instructions before the loop, to define loop bounds or set up a 608 special loop counter register. 609 610 DO_REORDER should be set to true if we should try to use the 611 reorder_loops function to ensure the loop end occurs after the loop 612 start. This is for use by targets where the loop hardware requires 613 this condition. 614 615 HOOKS is used to pass in target specific hooks; see 616 hw-doloop.h. */ 617 void 618 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks) 619 { 620 hwloop_info loops = NULL; 621 hwloop_info loop; 622 bitmap_obstack loop_stack; 623 624 df_live_add_problem (); 625 df_live_set_all_dirty (); 626 df_analyze (); 627 628 bitmap_obstack_initialize (&loop_stack); 629 630 if (dump_file) 631 fprintf (dump_file, ";; Find loops, first pass\n\n"); 632 633 loops = discover_loops (&loop_stack, hooks); 634 635 /* We can't enter cfglayout mode anymore if basic block partitioning 636 already happened. */ 637 if (do_reorder && !crtl->has_bb_partition) 638 { 639 reorder_loops (loops); 640 free_loops (loops); 641 642 if (dump_file) 643 fprintf (dump_file, ";; Find loops, second pass\n\n"); 644 645 loops = discover_loops (&loop_stack, hooks); 646 } 647 648 for (loop = loops; loop; loop = loop->next) 649 scan_loop (loop); 650 651 /* Now apply the optimizations. */ 652 for (loop = loops; loop; loop = loop->next) 653 optimize_loop (loop, hooks); 654 655 if (dump_file) 656 { 657 fprintf (dump_file, ";; After hardware loops optimization:\n\n"); 658 dump_hwloops (loops); 659 } 660 661 free_loops (loops); 662 bitmap_obstack_release (&loop_stack); 663 664 if (dump_file) 665 print_rtl (dump_file, get_insns ()); 666 } 667