1 /* Try to unroll loops, and split induction variables. 2 Copyright (C) 1992, 1993, 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002 3 Free Software Foundation, Inc. 4 Contributed by James E. Wilson, Cygnus Support/UC Berkeley. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 2, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING. If not, write to the Free 20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 21 02111-1307, USA. */ 22 23 /* Try to unroll a loop, and split induction variables. 24 25 Loops for which the number of iterations can be calculated exactly are 26 handled specially. If the number of iterations times the insn_count is 27 less than MAX_UNROLLED_INSNS, then the loop is unrolled completely. 28 Otherwise, we try to unroll the loop a number of times modulo the number 29 of iterations, so that only one exit test will be needed. It is unrolled 30 a number of times approximately equal to MAX_UNROLLED_INSNS divided by 31 the insn count. 32 33 Otherwise, if the number of iterations can be calculated exactly at 34 run time, and the loop is always entered at the top, then we try to 35 precondition the loop. That is, at run time, calculate how many times 36 the loop will execute, and then execute the loop body a few times so 37 that the remaining iterations will be some multiple of 4 (or 2 if the 38 loop is large). Then fall through to a loop unrolled 4 (or 2) times, 39 with only one exit test needed at the end of the loop. 40 41 Otherwise, if the number of iterations can not be calculated exactly, 42 not even at run time, then we still unroll the loop a number of times 43 approximately equal to MAX_UNROLLED_INSNS divided by the insn count, 44 but there must be an exit test after each copy of the loop body. 45 46 For each induction variable, which is dead outside the loop (replaceable) 47 or for which we can easily calculate the final value, if we can easily 48 calculate its value at each place where it is set as a function of the 49 current loop unroll count and the variable's value at loop entry, then 50 the induction variable is split into `N' different variables, one for 51 each copy of the loop body. One variable is live across the backward 52 branch, and the others are all calculated as a function of this variable. 53 This helps eliminate data dependencies, and leads to further opportunities 54 for cse. */ 55 56 /* Possible improvements follow: */ 57 58 /* ??? Add an extra pass somewhere to determine whether unrolling will 59 give any benefit. E.g. after generating all unrolled insns, compute the 60 cost of all insns and compare against cost of insns in rolled loop. 61 62 - On traditional architectures, unrolling a non-constant bound loop 63 is a win if there is a giv whose only use is in memory addresses, the 64 memory addresses can be split, and hence giv increments can be 65 eliminated. 66 - It is also a win if the loop is executed many times, and preconditioning 67 can be performed for the loop. 68 Add code to check for these and similar cases. */ 69 70 /* ??? Improve control of which loops get unrolled. Could use profiling 71 info to only unroll the most commonly executed loops. Perhaps have 72 a user specifyable option to control the amount of code expansion, 73 or the percent of loops to consider for unrolling. Etc. */ 74 75 /* ??? Look at the register copies inside the loop to see if they form a 76 simple permutation. If so, iterate the permutation until it gets back to 77 the start state. This is how many times we should unroll the loop, for 78 best results, because then all register copies can be eliminated. 79 For example, the lisp nreverse function should be unrolled 3 times 80 while (this) 81 { 82 next = this->cdr; 83 this->cdr = prev; 84 prev = this; 85 this = next; 86 } 87 88 ??? The number of times to unroll the loop may also be based on data 89 references in the loop. For example, if we have a loop that references 90 x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */ 91 92 /* ??? Add some simple linear equation solving capability so that we can 93 determine the number of loop iterations for more complex loops. 94 For example, consider this loop from gdb 95 #define SWAP_TARGET_AND_HOST(buffer,len) 96 { 97 char tmp; 98 char *p = (char *) buffer; 99 char *q = ((char *) buffer) + len - 1; 100 int iterations = (len + 1) >> 1; 101 int i; 102 for (p; p < q; p++, q--;) 103 { 104 tmp = *q; 105 *q = *p; 106 *p = tmp; 107 } 108 } 109 Note that: 110 start value = p = &buffer + current_iteration 111 end value = q = &buffer + len - 1 - current_iteration 112 Given the loop exit test of "p < q", then there must be "q - p" iterations, 113 set equal to zero and solve for number of iterations: 114 q - p = len - 1 - 2*current_iteration = 0 115 current_iteration = (len - 1) / 2 116 Hence, there are (len - 1) / 2 (rounded up to the nearest integer) 117 iterations of this loop. */ 118 119 /* ??? Currently, no labels are marked as loop invariant when doing loop 120 unrolling. This is because an insn inside the loop, that loads the address 121 of a label inside the loop into a register, could be moved outside the loop 122 by the invariant code motion pass if labels were invariant. If the loop 123 is subsequently unrolled, the code will be wrong because each unrolled 124 body of the loop will use the same address, whereas each actually needs a 125 different address. A case where this happens is when a loop containing 126 a switch statement is unrolled. 127 128 It would be better to let labels be considered invariant. When we 129 unroll loops here, check to see if any insns using a label local to the 130 loop were moved before the loop. If so, then correct the problem, by 131 moving the insn back into the loop, or perhaps replicate the insn before 132 the loop, one copy for each time the loop is unrolled. */ 133 134 #include "config.h" 135 #include "system.h" 136 #include "rtl.h" 137 #include "tm_p.h" 138 #include "insn-config.h" 139 #include "integrate.h" 140 #include "regs.h" 141 #include "recog.h" 142 #include "flags.h" 143 #include "function.h" 144 #include "expr.h" 145 #include "loop.h" 146 #include "toplev.h" 147 #include "hard-reg-set.h" 148 #include "basic-block.h" 149 #include "predict.h" 150 #include "params.h" 151 152 /* The prime factors looked for when trying to unroll a loop by some 153 number which is modulo the total number of iterations. Just checking 154 for these 4 prime factors will find at least one factor for 75% of 155 all numbers theoretically. Practically speaking, this will succeed 156 almost all of the time since loops are generally a multiple of 2 157 and/or 5. */ 158 159 #define NUM_FACTORS 4 160 161 static struct _factor { const int factor; int count; } 162 factors[NUM_FACTORS] = { {2, 0}, {3, 0}, {5, 0}, {7, 0}}; 163 164 /* Describes the different types of loop unrolling performed. */ 165 166 enum unroll_types 167 { 168 UNROLL_COMPLETELY, 169 UNROLL_MODULO, 170 UNROLL_NAIVE 171 }; 172 173 /* Indexed by register number, if nonzero, then it contains a pointer 174 to a struct induction for a DEST_REG giv which has been combined with 175 one of more address givs. This is needed because whenever such a DEST_REG 176 giv is modified, we must modify the value of all split address givs 177 that were combined with this DEST_REG giv. */ 178 179 static struct induction **addr_combined_regs; 180 181 /* Indexed by register number, if this is a splittable induction variable, 182 then this will hold the current value of the register, which depends on the 183 iteration number. */ 184 185 static rtx *splittable_regs; 186 187 /* Indexed by register number, if this is a splittable induction variable, 188 then this will hold the number of instructions in the loop that modify 189 the induction variable. Used to ensure that only the last insn modifying 190 a split iv will update the original iv of the dest. */ 191 192 static int *splittable_regs_updates; 193 194 /* Forward declarations. */ 195 196 static rtx simplify_cmp_and_jump_insns PARAMS ((enum rtx_code, 197 enum machine_mode, 198 rtx, rtx, rtx)); 199 static void init_reg_map PARAMS ((struct inline_remap *, int)); 200 static rtx calculate_giv_inc PARAMS ((rtx, rtx, unsigned int)); 201 static rtx initial_reg_note_copy PARAMS ((rtx, struct inline_remap *)); 202 static void final_reg_note_copy PARAMS ((rtx *, struct inline_remap *)); 203 static void copy_loop_body PARAMS ((struct loop *, rtx, rtx, 204 struct inline_remap *, rtx, int, 205 enum unroll_types, rtx, rtx, rtx, rtx)); 206 static int find_splittable_regs PARAMS ((const struct loop *, 207 enum unroll_types, int)); 208 static int find_splittable_givs PARAMS ((const struct loop *, 209 struct iv_class *, enum unroll_types, 210 rtx, int)); 211 static int reg_dead_after_loop PARAMS ((const struct loop *, rtx)); 212 static rtx fold_rtx_mult_add PARAMS ((rtx, rtx, rtx, enum machine_mode)); 213 static rtx remap_split_bivs PARAMS ((struct loop *, rtx)); 214 static rtx find_common_reg_term PARAMS ((rtx, rtx)); 215 static rtx subtract_reg_term PARAMS ((rtx, rtx)); 216 static rtx loop_find_equiv_value PARAMS ((const struct loop *, rtx)); 217 static rtx ujump_to_loop_cont PARAMS ((rtx, rtx)); 218 219 /* Try to unroll one loop and split induction variables in the loop. 220 221 The loop is described by the arguments LOOP and INSN_COUNT. 222 STRENGTH_REDUCTION_P indicates whether information generated in the 223 strength reduction pass is available. 224 225 This function is intended to be called from within `strength_reduce' 226 in loop.c. */ 227 228 void 229 unroll_loop (loop, insn_count, strength_reduce_p) 230 struct loop *loop; 231 int insn_count; 232 int strength_reduce_p; 233 { 234 struct loop_info *loop_info = LOOP_INFO (loop); 235 struct loop_ivs *ivs = LOOP_IVS (loop); 236 int i, j; 237 unsigned int r; 238 unsigned HOST_WIDE_INT temp; 239 int unroll_number = 1; 240 rtx copy_start, copy_end; 241 rtx insn, sequence, pattern, tem; 242 int max_labelno, max_insnno; 243 rtx insert_before; 244 struct inline_remap *map; 245 char *local_label = NULL; 246 char *local_regno; 247 unsigned int max_local_regnum; 248 unsigned int maxregnum; 249 rtx exit_label = 0; 250 rtx start_label; 251 struct iv_class *bl; 252 int splitting_not_safe = 0; 253 enum unroll_types unroll_type = UNROLL_NAIVE; 254 int loop_preconditioned = 0; 255 rtx safety_label; 256 /* This points to the last real insn in the loop, which should be either 257 a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional 258 jumps). */ 259 rtx last_loop_insn; 260 rtx loop_start = loop->start; 261 rtx loop_end = loop->end; 262 263 /* Don't bother unrolling huge loops. Since the minimum factor is 264 two, loops greater than one half of MAX_UNROLLED_INSNS will never 265 be unrolled. */ 266 if (insn_count > MAX_UNROLLED_INSNS / 2) 267 { 268 if (loop_dump_stream) 269 fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n"); 270 return; 271 } 272 273 /* Determine type of unroll to perform. Depends on the number of iterations 274 and the size of the loop. */ 275 276 /* If there is no strength reduce info, then set 277 loop_info->n_iterations to zero. This can happen if 278 strength_reduce can't find any bivs in the loop. A value of zero 279 indicates that the number of iterations could not be calculated. */ 280 281 if (! strength_reduce_p) 282 loop_info->n_iterations = 0; 283 284 if (loop_dump_stream && loop_info->n_iterations > 0) 285 { 286 fputs ("Loop unrolling: ", loop_dump_stream); 287 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, 288 loop_info->n_iterations); 289 fputs (" iterations.\n", loop_dump_stream); 290 } 291 292 /* Find and save a pointer to the last nonnote insn in the loop. */ 293 294 last_loop_insn = prev_nonnote_insn (loop_end); 295 296 /* Calculate how many times to unroll the loop. Indicate whether or 297 not the loop is being completely unrolled. */ 298 299 if (loop_info->n_iterations == 1) 300 { 301 /* Handle the case where the loop begins with an unconditional 302 jump to the loop condition. Make sure to delete the jump 303 insn, otherwise the loop body will never execute. */ 304 305 /* FIXME this actually checks for a jump to the continue point, which 306 is not the same as the condition in a for loop. As a result, this 307 optimization fails for most for loops. We should really use flow 308 information rather than instruction pattern matching. */ 309 rtx ujump = ujump_to_loop_cont (loop->start, loop->cont); 310 311 /* If number of iterations is exactly 1, then eliminate the compare and 312 branch at the end of the loop since they will never be taken. 313 Then return, since no other action is needed here. */ 314 315 /* If the last instruction is not a BARRIER or a JUMP_INSN, then 316 don't do anything. */ 317 318 if (GET_CODE (last_loop_insn) == BARRIER) 319 { 320 /* Delete the jump insn. This will delete the barrier also. */ 321 last_loop_insn = PREV_INSN (last_loop_insn); 322 } 323 324 if (ujump && GET_CODE (last_loop_insn) == JUMP_INSN) 325 { 326 #ifdef HAVE_cc0 327 rtx prev = PREV_INSN (last_loop_insn); 328 #endif 329 delete_related_insns (last_loop_insn); 330 #ifdef HAVE_cc0 331 /* The immediately preceding insn may be a compare which must be 332 deleted. */ 333 if (only_sets_cc0_p (prev)) 334 delete_related_insns (prev); 335 #endif 336 337 delete_related_insns (ujump); 338 339 /* Remove the loop notes since this is no longer a loop. */ 340 if (loop->vtop) 341 delete_related_insns (loop->vtop); 342 if (loop->cont) 343 delete_related_insns (loop->cont); 344 if (loop_start) 345 delete_related_insns (loop_start); 346 if (loop_end) 347 delete_related_insns (loop_end); 348 349 return; 350 } 351 } 352 353 if (loop_info->n_iterations > 0 354 /* Avoid overflow in the next expression. */ 355 && loop_info->n_iterations < (unsigned) MAX_UNROLLED_INSNS 356 && loop_info->n_iterations * insn_count < (unsigned) MAX_UNROLLED_INSNS) 357 { 358 unroll_number = loop_info->n_iterations; 359 unroll_type = UNROLL_COMPLETELY; 360 } 361 else if (loop_info->n_iterations > 0) 362 { 363 /* Try to factor the number of iterations. Don't bother with the 364 general case, only using 2, 3, 5, and 7 will get 75% of all 365 numbers theoretically, and almost all in practice. */ 366 367 for (i = 0; i < NUM_FACTORS; i++) 368 factors[i].count = 0; 369 370 temp = loop_info->n_iterations; 371 for (i = NUM_FACTORS - 1; i >= 0; i--) 372 while (temp % factors[i].factor == 0) 373 { 374 factors[i].count++; 375 temp = temp / factors[i].factor; 376 } 377 378 /* Start with the larger factors first so that we generally 379 get lots of unrolling. */ 380 381 unroll_number = 1; 382 temp = insn_count; 383 for (i = 3; i >= 0; i--) 384 while (factors[i].count--) 385 { 386 if (temp * factors[i].factor < (unsigned) MAX_UNROLLED_INSNS) 387 { 388 unroll_number *= factors[i].factor; 389 temp *= factors[i].factor; 390 } 391 else 392 break; 393 } 394 395 /* If we couldn't find any factors, then unroll as in the normal 396 case. */ 397 if (unroll_number == 1) 398 { 399 if (loop_dump_stream) 400 fprintf (loop_dump_stream, "Loop unrolling: No factors found.\n"); 401 } 402 else 403 unroll_type = UNROLL_MODULO; 404 } 405 406 /* Default case, calculate number of times to unroll loop based on its 407 size. */ 408 if (unroll_type == UNROLL_NAIVE) 409 { 410 if (8 * insn_count < MAX_UNROLLED_INSNS) 411 unroll_number = 8; 412 else if (4 * insn_count < MAX_UNROLLED_INSNS) 413 unroll_number = 4; 414 else 415 unroll_number = 2; 416 } 417 418 /* Now we know how many times to unroll the loop. */ 419 420 if (loop_dump_stream) 421 fprintf (loop_dump_stream, "Unrolling loop %d times.\n", unroll_number); 422 423 if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO) 424 { 425 /* Loops of these types can start with jump down to the exit condition 426 in rare circumstances. 427 428 Consider a pair of nested loops where the inner loop is part 429 of the exit code for the outer loop. 430 431 In this case jump.c will not duplicate the exit test for the outer 432 loop, so it will start with a jump to the exit code. 433 434 Then consider if the inner loop turns out to iterate once and 435 only once. We will end up deleting the jumps associated with 436 the inner loop. However, the loop notes are not removed from 437 the instruction stream. 438 439 And finally assume that we can compute the number of iterations 440 for the outer loop. 441 442 In this case unroll may want to unroll the outer loop even though 443 it starts with a jump to the outer loop's exit code. 444 445 We could try to optimize this case, but it hardly seems worth it. 446 Just return without unrolling the loop in such cases. */ 447 448 insn = loop_start; 449 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN) 450 insn = NEXT_INSN (insn); 451 if (GET_CODE (insn) == JUMP_INSN) 452 return; 453 } 454 455 if (unroll_type == UNROLL_COMPLETELY) 456 { 457 /* Completely unrolling the loop: Delete the compare and branch at 458 the end (the last two instructions). This delete must done at the 459 very end of loop unrolling, to avoid problems with calls to 460 back_branch_in_range_p, which is called by find_splittable_regs. 461 All increments of splittable bivs/givs are changed to load constant 462 instructions. */ 463 464 copy_start = loop_start; 465 466 /* Set insert_before to the instruction immediately after the JUMP_INSN 467 (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of 468 the loop will be correctly handled by copy_loop_body. */ 469 insert_before = NEXT_INSN (last_loop_insn); 470 471 /* Set copy_end to the insn before the jump at the end of the loop. */ 472 if (GET_CODE (last_loop_insn) == BARRIER) 473 copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); 474 else if (GET_CODE (last_loop_insn) == JUMP_INSN) 475 { 476 copy_end = PREV_INSN (last_loop_insn); 477 #ifdef HAVE_cc0 478 /* The instruction immediately before the JUMP_INSN may be a compare 479 instruction which we do not want to copy. */ 480 if (sets_cc0_p (PREV_INSN (copy_end))) 481 copy_end = PREV_INSN (copy_end); 482 #endif 483 } 484 else 485 { 486 /* We currently can't unroll a loop if it doesn't end with a 487 JUMP_INSN. There would need to be a mechanism that recognizes 488 this case, and then inserts a jump after each loop body, which 489 jumps to after the last loop body. */ 490 if (loop_dump_stream) 491 fprintf (loop_dump_stream, 492 "Unrolling failure: loop does not end with a JUMP_INSN.\n"); 493 return; 494 } 495 } 496 else if (unroll_type == UNROLL_MODULO) 497 { 498 /* Partially unrolling the loop: The compare and branch at the end 499 (the last two instructions) must remain. Don't copy the compare 500 and branch instructions at the end of the loop. Insert the unrolled 501 code immediately before the compare/branch at the end so that the 502 code will fall through to them as before. */ 503 504 copy_start = loop_start; 505 506 /* Set insert_before to the jump insn at the end of the loop. 507 Set copy_end to before the jump insn at the end of the loop. */ 508 if (GET_CODE (last_loop_insn) == BARRIER) 509 { 510 insert_before = PREV_INSN (last_loop_insn); 511 copy_end = PREV_INSN (insert_before); 512 } 513 else if (GET_CODE (last_loop_insn) == JUMP_INSN) 514 { 515 insert_before = last_loop_insn; 516 #ifdef HAVE_cc0 517 /* The instruction immediately before the JUMP_INSN may be a compare 518 instruction which we do not want to copy or delete. */ 519 if (sets_cc0_p (PREV_INSN (insert_before))) 520 insert_before = PREV_INSN (insert_before); 521 #endif 522 copy_end = PREV_INSN (insert_before); 523 } 524 else 525 { 526 /* We currently can't unroll a loop if it doesn't end with a 527 JUMP_INSN. There would need to be a mechanism that recognizes 528 this case, and then inserts a jump after each loop body, which 529 jumps to after the last loop body. */ 530 if (loop_dump_stream) 531 fprintf (loop_dump_stream, 532 "Unrolling failure: loop does not end with a JUMP_INSN.\n"); 533 return; 534 } 535 } 536 else 537 { 538 /* Normal case: Must copy the compare and branch instructions at the 539 end of the loop. */ 540 541 if (GET_CODE (last_loop_insn) == BARRIER) 542 { 543 /* Loop ends with an unconditional jump and a barrier. 544 Handle this like above, don't copy jump and barrier. 545 This is not strictly necessary, but doing so prevents generating 546 unconditional jumps to an immediately following label. 547 548 This will be corrected below if the target of this jump is 549 not the start_label. */ 550 551 insert_before = PREV_INSN (last_loop_insn); 552 copy_end = PREV_INSN (insert_before); 553 } 554 else if (GET_CODE (last_loop_insn) == JUMP_INSN) 555 { 556 /* Set insert_before to immediately after the JUMP_INSN, so that 557 NOTEs at the end of the loop will be correctly handled by 558 copy_loop_body. */ 559 insert_before = NEXT_INSN (last_loop_insn); 560 copy_end = last_loop_insn; 561 } 562 else 563 { 564 /* We currently can't unroll a loop if it doesn't end with a 565 JUMP_INSN. There would need to be a mechanism that recognizes 566 this case, and then inserts a jump after each loop body, which 567 jumps to after the last loop body. */ 568 if (loop_dump_stream) 569 fprintf (loop_dump_stream, 570 "Unrolling failure: loop does not end with a JUMP_INSN.\n"); 571 return; 572 } 573 574 /* If copying exit test branches because they can not be eliminated, 575 then must convert the fall through case of the branch to a jump past 576 the end of the loop. Create a label to emit after the loop and save 577 it for later use. Do not use the label after the loop, if any, since 578 it might be used by insns outside the loop, or there might be insns 579 added before it later by final_[bg]iv_value which must be after 580 the real exit label. */ 581 exit_label = gen_label_rtx (); 582 583 insn = loop_start; 584 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN) 585 insn = NEXT_INSN (insn); 586 587 if (GET_CODE (insn) == JUMP_INSN) 588 { 589 /* The loop starts with a jump down to the exit condition test. 590 Start copying the loop after the barrier following this 591 jump insn. */ 592 copy_start = NEXT_INSN (insn); 593 594 /* Splitting induction variables doesn't work when the loop is 595 entered via a jump to the bottom, because then we end up doing 596 a comparison against a new register for a split variable, but 597 we did not execute the set insn for the new register because 598 it was skipped over. */ 599 splitting_not_safe = 1; 600 if (loop_dump_stream) 601 fprintf (loop_dump_stream, 602 "Splitting not safe, because loop not entered at top.\n"); 603 } 604 else 605 copy_start = loop_start; 606 } 607 608 /* This should always be the first label in the loop. */ 609 start_label = NEXT_INSN (copy_start); 610 /* There may be a line number note and/or a loop continue note here. */ 611 while (GET_CODE (start_label) == NOTE) 612 start_label = NEXT_INSN (start_label); 613 if (GET_CODE (start_label) != CODE_LABEL) 614 { 615 /* This can happen as a result of jump threading. If the first insns in 616 the loop test the same condition as the loop's backward jump, or the 617 opposite condition, then the backward jump will be modified to point 618 to elsewhere, and the loop's start label is deleted. 619 620 This case currently can not be handled by the loop unrolling code. */ 621 622 if (loop_dump_stream) 623 fprintf (loop_dump_stream, 624 "Unrolling failure: unknown insns between BEG note and loop label.\n"); 625 return; 626 } 627 if (LABEL_NAME (start_label)) 628 { 629 /* The jump optimization pass must have combined the original start label 630 with a named label for a goto. We can't unroll this case because 631 jumps which go to the named label must be handled differently than 632 jumps to the loop start, and it is impossible to differentiate them 633 in this case. */ 634 if (loop_dump_stream) 635 fprintf (loop_dump_stream, 636 "Unrolling failure: loop start label is gone\n"); 637 return; 638 } 639 640 if (unroll_type == UNROLL_NAIVE 641 && GET_CODE (last_loop_insn) == BARRIER 642 && GET_CODE (PREV_INSN (last_loop_insn)) == JUMP_INSN 643 && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn))) 644 { 645 /* In this case, we must copy the jump and barrier, because they will 646 not be converted to jumps to an immediately following label. */ 647 648 insert_before = NEXT_INSN (last_loop_insn); 649 copy_end = last_loop_insn; 650 } 651 652 if (unroll_type == UNROLL_NAIVE 653 && GET_CODE (last_loop_insn) == JUMP_INSN 654 && start_label != JUMP_LABEL (last_loop_insn)) 655 { 656 /* ??? The loop ends with a conditional branch that does not branch back 657 to the loop start label. In this case, we must emit an unconditional 658 branch to the loop exit after emitting the final branch. 659 copy_loop_body does not have support for this currently, so we 660 give up. It doesn't seem worthwhile to unroll anyways since 661 unrolling would increase the number of branch instructions 662 executed. */ 663 if (loop_dump_stream) 664 fprintf (loop_dump_stream, 665 "Unrolling failure: final conditional branch not to loop start\n"); 666 return; 667 } 668 669 /* Allocate a translation table for the labels and insn numbers. 670 They will be filled in as we copy the insns in the loop. */ 671 672 max_labelno = max_label_num (); 673 max_insnno = get_max_uid (); 674 675 /* Various paths through the unroll code may reach the "egress" label 676 without initializing fields within the map structure. 677 678 To be safe, we use xcalloc to zero the memory. */ 679 map = (struct inline_remap *) xcalloc (1, sizeof (struct inline_remap)); 680 681 /* Allocate the label map. */ 682 683 if (max_labelno > 0) 684 { 685 map->label_map = (rtx *) xcalloc (max_labelno, sizeof (rtx)); 686 local_label = (char *) xcalloc (max_labelno, sizeof (char)); 687 } 688 689 /* Search the loop and mark all local labels, i.e. the ones which have to 690 be distinct labels when copied. For all labels which might be 691 non-local, set their label_map entries to point to themselves. 692 If they happen to be local their label_map entries will be overwritten 693 before the loop body is copied. The label_map entries for local labels 694 will be set to a different value each time the loop body is copied. */ 695 696 for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn)) 697 { 698 rtx note; 699 700 if (GET_CODE (insn) == CODE_LABEL) 701 local_label[CODE_LABEL_NUMBER (insn)] = 1; 702 else if (GET_CODE (insn) == JUMP_INSN) 703 { 704 if (JUMP_LABEL (insn)) 705 set_label_in_map (map, 706 CODE_LABEL_NUMBER (JUMP_LABEL (insn)), 707 JUMP_LABEL (insn)); 708 else if (GET_CODE (PATTERN (insn)) == ADDR_VEC 709 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) 710 { 711 rtx pat = PATTERN (insn); 712 int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC; 713 int len = XVECLEN (pat, diff_vec_p); 714 rtx label; 715 716 for (i = 0; i < len; i++) 717 { 718 label = XEXP (XVECEXP (pat, diff_vec_p, i), 0); 719 set_label_in_map (map, CODE_LABEL_NUMBER (label), label); 720 } 721 } 722 } 723 if ((note = find_reg_note (insn, REG_LABEL, NULL_RTX))) 724 set_label_in_map (map, CODE_LABEL_NUMBER (XEXP (note, 0)), 725 XEXP (note, 0)); 726 } 727 728 /* Allocate space for the insn map. */ 729 730 map->insn_map = (rtx *) xmalloc (max_insnno * sizeof (rtx)); 731 732 /* Set this to zero, to indicate that we are doing loop unrolling, 733 not function inlining. */ 734 map->inline_target = 0; 735 736 /* The register and constant maps depend on the number of registers 737 present, so the final maps can't be created until after 738 find_splittable_regs is called. However, they are needed for 739 preconditioning, so we create temporary maps when preconditioning 740 is performed. */ 741 742 /* The preconditioning code may allocate two new pseudo registers. */ 743 maxregnum = max_reg_num (); 744 745 /* local_regno is only valid for regnos < max_local_regnum. */ 746 max_local_regnum = maxregnum; 747 748 /* Allocate and zero out the splittable_regs and addr_combined_regs 749 arrays. These must be zeroed here because they will be used if 750 loop preconditioning is performed, and must be zero for that case. 751 752 It is safe to do this here, since the extra registers created by the 753 preconditioning code and find_splittable_regs will never be used 754 to access the splittable_regs[] and addr_combined_regs[] arrays. */ 755 756 splittable_regs = (rtx *) xcalloc (maxregnum, sizeof (rtx)); 757 splittable_regs_updates = (int *) xcalloc (maxregnum, sizeof (int)); 758 addr_combined_regs 759 = (struct induction **) xcalloc (maxregnum, sizeof (struct induction *)); 760 local_regno = (char *) xcalloc (maxregnum, sizeof (char)); 761 762 /* Mark all local registers, i.e. the ones which are referenced only 763 inside the loop. */ 764 if (INSN_UID (copy_end) < max_uid_for_loop) 765 { 766 int copy_start_luid = INSN_LUID (copy_start); 767 int copy_end_luid = INSN_LUID (copy_end); 768 769 /* If a register is used in the jump insn, we must not duplicate it 770 since it will also be used outside the loop. */ 771 if (GET_CODE (copy_end) == JUMP_INSN) 772 copy_end_luid--; 773 774 /* If we have a target that uses cc0, then we also must not duplicate 775 the insn that sets cc0 before the jump insn, if one is present. */ 776 #ifdef HAVE_cc0 777 if (GET_CODE (copy_end) == JUMP_INSN 778 && sets_cc0_p (PREV_INSN (copy_end))) 779 copy_end_luid--; 780 #endif 781 782 /* If copy_start points to the NOTE that starts the loop, then we must 783 use the next luid, because invariant pseudo-regs moved out of the loop 784 have their lifetimes modified to start here, but they are not safe 785 to duplicate. */ 786 if (copy_start == loop_start) 787 copy_start_luid++; 788 789 /* If a pseudo's lifetime is entirely contained within this loop, then we 790 can use a different pseudo in each unrolled copy of the loop. This 791 results in better code. */ 792 /* We must limit the generic test to max_reg_before_loop, because only 793 these pseudo registers have valid regno_first_uid info. */ 794 for (r = FIRST_PSEUDO_REGISTER; r < max_reg_before_loop; ++r) 795 if (REGNO_FIRST_UID (r) > 0 && REGNO_FIRST_UID (r) < max_uid_for_loop 796 && REGNO_FIRST_LUID (r) >= copy_start_luid 797 && REGNO_LAST_NOTE_UID (r) > 0 && REGNO_LAST_NOTE_UID (r) < max_uid_for_loop 798 && REGNO_LAST_NOTE_LUID (r) <= copy_end_luid) 799 { 800 /* However, we must also check for loop-carried dependencies. 801 If the value the pseudo has at the end of iteration X is 802 used by iteration X+1, then we can not use a different pseudo 803 for each unrolled copy of the loop. */ 804 /* A pseudo is safe if regno_first_uid is a set, and this 805 set dominates all instructions from regno_first_uid to 806 regno_last_uid. */ 807 /* ??? This check is simplistic. We would get better code if 808 this check was more sophisticated. */ 809 if (set_dominates_use (r, REGNO_FIRST_UID (r), REGNO_LAST_UID (r), 810 copy_start, copy_end)) 811 local_regno[r] = 1; 812 813 if (loop_dump_stream) 814 { 815 if (local_regno[r]) 816 fprintf (loop_dump_stream, "Marked reg %d as local\n", r); 817 else 818 fprintf (loop_dump_stream, "Did not mark reg %d as local\n", 819 r); 820 } 821 } 822 } 823 824 /* If this loop requires exit tests when unrolled, check to see if we 825 can precondition the loop so as to make the exit tests unnecessary. 826 Just like variable splitting, this is not safe if the loop is entered 827 via a jump to the bottom. Also, can not do this if no strength 828 reduce info, because precondition_loop_p uses this info. */ 829 830 /* Must copy the loop body for preconditioning before the following 831 find_splittable_regs call since that will emit insns which need to 832 be after the preconditioned loop copies, but immediately before the 833 unrolled loop copies. */ 834 835 /* Also, it is not safe to split induction variables for the preconditioned 836 copies of the loop body. If we split induction variables, then the code 837 assumes that each induction variable can be represented as a function 838 of its initial value and the loop iteration number. This is not true 839 in this case, because the last preconditioned copy of the loop body 840 could be any iteration from the first up to the `unroll_number-1'th, 841 depending on the initial value of the iteration variable. Therefore 842 we can not split induction variables here, because we can not calculate 843 their value. Hence, this code must occur before find_splittable_regs 844 is called. */ 845 846 if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p) 847 { 848 rtx initial_value, final_value, increment; 849 enum machine_mode mode; 850 851 if (precondition_loop_p (loop, 852 &initial_value, &final_value, &increment, 853 &mode)) 854 { 855 rtx diff, insn; 856 rtx *labels; 857 int abs_inc, neg_inc; 858 enum rtx_code cc = loop_info->comparison_code; 859 int less_p = (cc == LE || cc == LEU || cc == LT || cc == LTU); 860 int unsigned_p = (cc == LEU || cc == GEU || cc == LTU || cc == GTU); 861 862 map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx)); 863 864 VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray, maxregnum, 865 "unroll_loop_precondition"); 866 global_const_equiv_varray = map->const_equiv_varray; 867 868 init_reg_map (map, maxregnum); 869 870 /* Limit loop unrolling to 4, since this will make 7 copies of 871 the loop body. */ 872 if (unroll_number > 4) 873 unroll_number = 4; 874 875 /* Save the absolute value of the increment, and also whether or 876 not it is negative. */ 877 neg_inc = 0; 878 abs_inc = INTVAL (increment); 879 if (abs_inc < 0) 880 { 881 abs_inc = -abs_inc; 882 neg_inc = 1; 883 } 884 885 start_sequence (); 886 887 /* We must copy the final and initial values here to avoid 888 improperly shared rtl. */ 889 final_value = copy_rtx (final_value); 890 initial_value = copy_rtx (initial_value); 891 892 /* Final value may have form of (PLUS val1 const1_rtx). We need 893 to convert it into general operand, so compute the real value. */ 894 895 final_value = force_operand (final_value, NULL_RTX); 896 if (!nonmemory_operand (final_value, VOIDmode)) 897 final_value = force_reg (mode, final_value); 898 899 /* Calculate the difference between the final and initial values. 900 Final value may be a (plus (reg x) (const_int 1)) rtx. 901 902 We have to deal with for (i = 0; --i < 6;) type loops. 903 For such loops the real final value is the first time the 904 loop variable overflows, so the diff we calculate is the 905 distance from the overflow value. This is 0 or ~0 for 906 unsigned loops depending on the direction, or INT_MAX, 907 INT_MAX+1 for signed loops. We really do not need the 908 exact value, since we are only interested in the diff 909 modulo the increment, and the increment is a power of 2, 910 so we can pretend that the overflow value is 0/~0. */ 911 912 if (cc == NE || less_p != neg_inc) 913 diff = simplify_gen_binary (MINUS, mode, final_value, 914 initial_value); 915 else 916 diff = simplify_gen_unary (neg_inc ? NOT : NEG, mode, 917 initial_value, mode); 918 diff = force_operand (diff, NULL_RTX); 919 920 /* Now calculate (diff % (unroll * abs (increment))) by using an 921 and instruction. */ 922 diff = simplify_gen_binary (AND, mode, diff, 923 GEN_INT (unroll_number*abs_inc - 1)); 924 diff = force_operand (diff, NULL_RTX); 925 926 /* Now emit a sequence of branches to jump to the proper precond 927 loop entry point. */ 928 929 labels = (rtx *) xmalloc (sizeof (rtx) * unroll_number); 930 for (i = 0; i < unroll_number; i++) 931 labels[i] = gen_label_rtx (); 932 933 /* Check for the case where the initial value is greater than or 934 equal to the final value. In that case, we want to execute 935 exactly one loop iteration. The code below will fail for this 936 case. This check does not apply if the loop has a NE 937 comparison at the end. */ 938 939 if (cc != NE) 940 { 941 rtx incremented_initval; 942 enum rtx_code cmp_code; 943 944 incremented_initval 945 = simplify_gen_binary (PLUS, mode, initial_value, increment); 946 incremented_initval 947 = force_operand (incremented_initval, NULL_RTX); 948 949 cmp_code = (less_p 950 ? (unsigned_p ? GEU : GE) 951 : (unsigned_p ? LEU : LE)); 952 953 insn = simplify_cmp_and_jump_insns (cmp_code, mode, 954 incremented_initval, 955 final_value, labels[1]); 956 if (insn) 957 predict_insn_def (insn, PRED_LOOP_CONDITION, TAKEN); 958 } 959 960 /* Assuming the unroll_number is 4, and the increment is 2, then 961 for a negative increment: for a positive increment: 962 diff = 0,1 precond 0 diff = 0,7 precond 0 963 diff = 2,3 precond 3 diff = 1,2 precond 1 964 diff = 4,5 precond 2 diff = 3,4 precond 2 965 diff = 6,7 precond 1 diff = 5,6 precond 3 */ 966 967 /* We only need to emit (unroll_number - 1) branches here, the 968 last case just falls through to the following code. */ 969 970 /* ??? This would give better code if we emitted a tree of branches 971 instead of the current linear list of branches. */ 972 973 for (i = 0; i < unroll_number - 1; i++) 974 { 975 int cmp_const; 976 enum rtx_code cmp_code; 977 978 /* For negative increments, must invert the constant compared 979 against, except when comparing against zero. */ 980 if (i == 0) 981 { 982 cmp_const = 0; 983 cmp_code = EQ; 984 } 985 else if (neg_inc) 986 { 987 cmp_const = unroll_number - i; 988 cmp_code = GE; 989 } 990 else 991 { 992 cmp_const = i; 993 cmp_code = LE; 994 } 995 996 insn = simplify_cmp_and_jump_insns (cmp_code, mode, diff, 997 GEN_INT (abs_inc*cmp_const), 998 labels[i]); 999 if (insn) 1000 predict_insn (insn, PRED_LOOP_PRECONDITIONING, 1001 REG_BR_PROB_BASE / (unroll_number - i)); 1002 } 1003 1004 /* If the increment is greater than one, then we need another branch, 1005 to handle other cases equivalent to 0. */ 1006 1007 /* ??? This should be merged into the code above somehow to help 1008 simplify the code here, and reduce the number of branches emitted. 1009 For the negative increment case, the branch here could easily 1010 be merged with the `0' case branch above. For the positive 1011 increment case, it is not clear how this can be simplified. */ 1012 1013 if (abs_inc != 1) 1014 { 1015 int cmp_const; 1016 enum rtx_code cmp_code; 1017 1018 if (neg_inc) 1019 { 1020 cmp_const = abs_inc - 1; 1021 cmp_code = LE; 1022 } 1023 else 1024 { 1025 cmp_const = abs_inc * (unroll_number - 1) + 1; 1026 cmp_code = GE; 1027 } 1028 1029 simplify_cmp_and_jump_insns (cmp_code, mode, diff, 1030 GEN_INT (cmp_const), labels[0]); 1031 } 1032 1033 sequence = get_insns (); 1034 end_sequence (); 1035 loop_insn_hoist (loop, sequence); 1036 1037 /* Only the last copy of the loop body here needs the exit 1038 test, so set copy_end to exclude the compare/branch here, 1039 and then reset it inside the loop when get to the last 1040 copy. */ 1041 1042 if (GET_CODE (last_loop_insn) == BARRIER) 1043 copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); 1044 else if (GET_CODE (last_loop_insn) == JUMP_INSN) 1045 { 1046 copy_end = PREV_INSN (last_loop_insn); 1047 #ifdef HAVE_cc0 1048 /* The immediately preceding insn may be a compare which 1049 we do not want to copy. */ 1050 if (sets_cc0_p (PREV_INSN (copy_end))) 1051 copy_end = PREV_INSN (copy_end); 1052 #endif 1053 } 1054 else 1055 abort (); 1056 1057 for (i = 1; i < unroll_number; i++) 1058 { 1059 emit_label_after (labels[unroll_number - i], 1060 PREV_INSN (loop_start)); 1061 1062 memset ((char *) map->insn_map, 0, max_insnno * sizeof (rtx)); 1063 memset ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0), 1064 0, (VARRAY_SIZE (map->const_equiv_varray) 1065 * sizeof (struct const_equiv_data))); 1066 map->const_age = 0; 1067 1068 for (j = 0; j < max_labelno; j++) 1069 if (local_label[j]) 1070 set_label_in_map (map, j, gen_label_rtx ()); 1071 1072 for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++) 1073 if (local_regno[r]) 1074 { 1075 map->reg_map[r] 1076 = gen_reg_rtx (GET_MODE (regno_reg_rtx[r])); 1077 record_base_value (REGNO (map->reg_map[r]), 1078 regno_reg_rtx[r], 0); 1079 } 1080 /* The last copy needs the compare/branch insns at the end, 1081 so reset copy_end here if the loop ends with a conditional 1082 branch. */ 1083 1084 if (i == unroll_number - 1) 1085 { 1086 if (GET_CODE (last_loop_insn) == BARRIER) 1087 copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); 1088 else 1089 copy_end = last_loop_insn; 1090 } 1091 1092 /* None of the copies are the `last_iteration', so just 1093 pass zero for that parameter. */ 1094 copy_loop_body (loop, copy_start, copy_end, map, exit_label, 0, 1095 unroll_type, start_label, loop_end, 1096 loop_start, copy_end); 1097 } 1098 emit_label_after (labels[0], PREV_INSN (loop_start)); 1099 1100 if (GET_CODE (last_loop_insn) == BARRIER) 1101 { 1102 insert_before = PREV_INSN (last_loop_insn); 1103 copy_end = PREV_INSN (insert_before); 1104 } 1105 else 1106 { 1107 insert_before = last_loop_insn; 1108 #ifdef HAVE_cc0 1109 /* The instruction immediately before the JUMP_INSN may 1110 be a compare instruction which we do not want to copy 1111 or delete. */ 1112 if (sets_cc0_p (PREV_INSN (insert_before))) 1113 insert_before = PREV_INSN (insert_before); 1114 #endif 1115 copy_end = PREV_INSN (insert_before); 1116 } 1117 1118 /* Set unroll type to MODULO now. */ 1119 unroll_type = UNROLL_MODULO; 1120 loop_preconditioned = 1; 1121 1122 /* Preconditioning changes the loop's initial value. We set 1123 it to an unknown value so that doloop_optimize won't get 1124 confused. */ 1125 loop_info->initial_value = 0; 1126 1127 /* Clean up. */ 1128 free (labels); 1129 } 1130 } 1131 1132 /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll 1133 the loop unless all loops are being unrolled. */ 1134 if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops) 1135 { 1136 if (loop_dump_stream) 1137 fprintf (loop_dump_stream, 1138 "Unrolling failure: Naive unrolling not being done.\n"); 1139 goto egress; 1140 } 1141 1142 /* At this point, we are guaranteed to unroll the loop. */ 1143 1144 /* Keep track of the unroll factor for the loop. */ 1145 loop_info->unroll_number = unroll_number; 1146 1147 /* And whether the loop has been preconditioned. */ 1148 loop_info->preconditioned = loop_preconditioned; 1149 1150 /* Remember whether it was preconditioned for the second loop pass. */ 1151 NOTE_PRECONDITIONED (loop->end) = loop_preconditioned; 1152 1153 /* For each biv and giv, determine whether it can be safely split into 1154 a different variable for each unrolled copy of the loop body. 1155 We precalculate and save this info here, since computing it is 1156 expensive. 1157 1158 Do this before deleting any instructions from the loop, so that 1159 back_branch_in_range_p will work correctly. */ 1160 1161 if (splitting_not_safe) 1162 temp = 0; 1163 else 1164 temp = find_splittable_regs (loop, unroll_type, unroll_number); 1165 1166 /* find_splittable_regs may have created some new registers, so must 1167 reallocate the reg_map with the new larger size, and must realloc 1168 the constant maps also. */ 1169 1170 maxregnum = max_reg_num (); 1171 map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx)); 1172 1173 init_reg_map (map, maxregnum); 1174 1175 if (map->const_equiv_varray == 0) 1176 VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray, 1177 maxregnum + temp * unroll_number * 2, 1178 "unroll_loop"); 1179 global_const_equiv_varray = map->const_equiv_varray; 1180 1181 /* Search the list of bivs and givs to find ones which need to be remapped 1182 when split, and set their reg_map entry appropriately. */ 1183 1184 for (bl = ivs->list; bl; bl = bl->next) 1185 { 1186 if (REGNO (bl->biv->src_reg) != bl->regno) 1187 map->reg_map[bl->regno] = bl->biv->src_reg; 1188 #if 0 1189 /* Currently, non-reduced/final-value givs are never split. */ 1190 for (v = bl->giv; v; v = v->next_iv) 1191 if (REGNO (v->src_reg) != bl->regno) 1192 map->reg_map[REGNO (v->dest_reg)] = v->src_reg; 1193 #endif 1194 } 1195 1196 /* Use our current register alignment and pointer flags. */ 1197 map->regno_pointer_align = cfun->emit->regno_pointer_align; 1198 map->x_regno_reg_rtx = cfun->emit->x_regno_reg_rtx; 1199 1200 /* If the loop is being partially unrolled, and the iteration variables 1201 are being split, and are being renamed for the split, then must fix up 1202 the compare/jump instruction at the end of the loop to refer to the new 1203 registers. This compare isn't copied, so the registers used in it 1204 will never be replaced if it isn't done here. */ 1205 1206 if (unroll_type == UNROLL_MODULO) 1207 { 1208 insn = NEXT_INSN (copy_end); 1209 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN) 1210 PATTERN (insn) = remap_split_bivs (loop, PATTERN (insn)); 1211 } 1212 1213 /* For unroll_number times, make a copy of each instruction 1214 between copy_start and copy_end, and insert these new instructions 1215 before the end of the loop. */ 1216 1217 for (i = 0; i < unroll_number; i++) 1218 { 1219 memset ((char *) map->insn_map, 0, max_insnno * sizeof (rtx)); 1220 memset ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0), 0, 1221 VARRAY_SIZE (map->const_equiv_varray) * sizeof (struct const_equiv_data)); 1222 map->const_age = 0; 1223 1224 for (j = 0; j < max_labelno; j++) 1225 if (local_label[j]) 1226 set_label_in_map (map, j, gen_label_rtx ()); 1227 1228 for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++) 1229 if (local_regno[r]) 1230 { 1231 map->reg_map[r] = gen_reg_rtx (GET_MODE (regno_reg_rtx[r])); 1232 record_base_value (REGNO (map->reg_map[r]), 1233 regno_reg_rtx[r], 0); 1234 } 1235 1236 /* If loop starts with a branch to the test, then fix it so that 1237 it points to the test of the first unrolled copy of the loop. */ 1238 if (i == 0 && loop_start != copy_start) 1239 { 1240 insn = PREV_INSN (copy_start); 1241 pattern = PATTERN (insn); 1242 1243 tem = get_label_from_map (map, 1244 CODE_LABEL_NUMBER 1245 (XEXP (SET_SRC (pattern), 0))); 1246 SET_SRC (pattern) = gen_rtx_LABEL_REF (VOIDmode, tem); 1247 1248 /* Set the jump label so that it can be used by later loop unrolling 1249 passes. */ 1250 JUMP_LABEL (insn) = tem; 1251 LABEL_NUSES (tem)++; 1252 } 1253 1254 copy_loop_body (loop, copy_start, copy_end, map, exit_label, 1255 i == unroll_number - 1, unroll_type, start_label, 1256 loop_end, insert_before, insert_before); 1257 } 1258 1259 /* Before deleting any insns, emit a CODE_LABEL immediately after the last 1260 insn to be deleted. This prevents any runaway delete_insn call from 1261 more insns that it should, as it always stops at a CODE_LABEL. */ 1262 1263 /* Delete the compare and branch at the end of the loop if completely 1264 unrolling the loop. Deleting the backward branch at the end also 1265 deletes the code label at the start of the loop. This is done at 1266 the very end to avoid problems with back_branch_in_range_p. */ 1267 1268 if (unroll_type == UNROLL_COMPLETELY) 1269 safety_label = emit_label_after (gen_label_rtx (), last_loop_insn); 1270 else 1271 safety_label = emit_label_after (gen_label_rtx (), copy_end); 1272 1273 /* Delete all of the original loop instructions. Don't delete the 1274 LOOP_BEG note, or the first code label in the loop. */ 1275 1276 insn = NEXT_INSN (copy_start); 1277 while (insn != safety_label) 1278 { 1279 /* ??? Don't delete named code labels. They will be deleted when the 1280 jump that references them is deleted. Otherwise, we end up deleting 1281 them twice, which causes them to completely disappear instead of turn 1282 into NOTE_INSN_DELETED_LABEL notes. This in turn causes aborts in 1283 dwarfout.c/dwarf2out.c. We could perhaps fix the dwarf*out.c files 1284 to handle deleted labels instead. Or perhaps fix DECL_RTL of the 1285 associated LABEL_DECL to point to one of the new label instances. */ 1286 /* ??? Likewise, we can't delete a NOTE_INSN_DELETED_LABEL note. */ 1287 if (insn != start_label 1288 && ! (GET_CODE (insn) == CODE_LABEL && LABEL_NAME (insn)) 1289 && ! (GET_CODE (insn) == NOTE 1290 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)) 1291 insn = delete_related_insns (insn); 1292 else 1293 insn = NEXT_INSN (insn); 1294 } 1295 1296 /* Can now delete the 'safety' label emitted to protect us from runaway 1297 delete_related_insns calls. */ 1298 if (INSN_DELETED_P (safety_label)) 1299 abort (); 1300 delete_related_insns (safety_label); 1301 1302 /* If exit_label exists, emit it after the loop. Doing the emit here 1303 forces it to have a higher INSN_UID than any insn in the unrolled loop. 1304 This is needed so that mostly_true_jump in reorg.c will treat jumps 1305 to this loop end label correctly, i.e. predict that they are usually 1306 not taken. */ 1307 if (exit_label) 1308 emit_label_after (exit_label, loop_end); 1309 1310 egress: 1311 if (unroll_type == UNROLL_COMPLETELY) 1312 { 1313 /* Remove the loop notes since this is no longer a loop. */ 1314 if (loop->vtop) 1315 delete_related_insns (loop->vtop); 1316 if (loop->cont) 1317 delete_related_insns (loop->cont); 1318 if (loop_start) 1319 delete_related_insns (loop_start); 1320 if (loop_end) 1321 delete_related_insns (loop_end); 1322 } 1323 1324 if (map->const_equiv_varray) 1325 VARRAY_FREE (map->const_equiv_varray); 1326 if (map->label_map) 1327 { 1328 free (map->label_map); 1329 free (local_label); 1330 } 1331 free (map->insn_map); 1332 free (splittable_regs); 1333 free (splittable_regs_updates); 1334 free (addr_combined_regs); 1335 free (local_regno); 1336 if (map->reg_map) 1337 free (map->reg_map); 1338 free (map); 1339 } 1340 1341 /* A helper function for unroll_loop. Emit a compare and branch to 1342 satisfy (CMP OP1 OP2), but pass this through the simplifier first. 1343 If the branch turned out to be conditional, return it, otherwise 1344 return NULL. */ 1345 1346 static rtx 1347 simplify_cmp_and_jump_insns (code, mode, op0, op1, label) 1348 enum rtx_code code; 1349 enum machine_mode mode; 1350 rtx op0, op1, label; 1351 { 1352 rtx t, insn; 1353 1354 t = simplify_relational_operation (code, mode, op0, op1); 1355 if (!t) 1356 { 1357 enum rtx_code scode = signed_condition (code); 1358 emit_cmp_and_jump_insns (op0, op1, scode, NULL_RTX, mode, 1359 code != scode, label); 1360 insn = get_last_insn (); 1361 1362 JUMP_LABEL (insn) = label; 1363 LABEL_NUSES (label) += 1; 1364 1365 return insn; 1366 } 1367 else if (t == const_true_rtx) 1368 { 1369 insn = emit_jump_insn (gen_jump (label)); 1370 emit_barrier (); 1371 JUMP_LABEL (insn) = label; 1372 LABEL_NUSES (label) += 1; 1373 } 1374 1375 return NULL_RTX; 1376 } 1377 1378 /* Return true if the loop can be safely, and profitably, preconditioned 1379 so that the unrolled copies of the loop body don't need exit tests. 1380 1381 This only works if final_value, initial_value and increment can be 1382 determined, and if increment is a constant power of 2. 1383 If increment is not a power of 2, then the preconditioning modulo 1384 operation would require a real modulo instead of a boolean AND, and this 1385 is not considered `profitable'. */ 1386 1387 /* ??? If the loop is known to be executed very many times, or the machine 1388 has a very cheap divide instruction, then preconditioning is a win even 1389 when the increment is not a power of 2. Use RTX_COST to compute 1390 whether divide is cheap. 1391 ??? A divide by constant doesn't actually need a divide, look at 1392 expand_divmod. The reduced cost of this optimized modulo is not 1393 reflected in RTX_COST. */ 1394 1395 int 1396 precondition_loop_p (loop, initial_value, final_value, increment, mode) 1397 const struct loop *loop; 1398 rtx *initial_value, *final_value, *increment; 1399 enum machine_mode *mode; 1400 { 1401 rtx loop_start = loop->start; 1402 struct loop_info *loop_info = LOOP_INFO (loop); 1403 1404 if (loop_info->n_iterations > 0) 1405 { 1406 if (INTVAL (loop_info->increment) > 0) 1407 { 1408 *initial_value = const0_rtx; 1409 *increment = const1_rtx; 1410 *final_value = GEN_INT (loop_info->n_iterations); 1411 } 1412 else 1413 { 1414 *initial_value = GEN_INT (loop_info->n_iterations); 1415 *increment = constm1_rtx; 1416 *final_value = const0_rtx; 1417 } 1418 *mode = word_mode; 1419 1420 if (loop_dump_stream) 1421 { 1422 fputs ("Preconditioning: Success, number of iterations known, ", 1423 loop_dump_stream); 1424 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, 1425 loop_info->n_iterations); 1426 fputs (".\n", loop_dump_stream); 1427 } 1428 return 1; 1429 } 1430 1431 if (loop_info->iteration_var == 0) 1432 { 1433 if (loop_dump_stream) 1434 fprintf (loop_dump_stream, 1435 "Preconditioning: Could not find iteration variable.\n"); 1436 return 0; 1437 } 1438 else if (loop_info->initial_value == 0) 1439 { 1440 if (loop_dump_stream) 1441 fprintf (loop_dump_stream, 1442 "Preconditioning: Could not find initial value.\n"); 1443 return 0; 1444 } 1445 else if (loop_info->increment == 0) 1446 { 1447 if (loop_dump_stream) 1448 fprintf (loop_dump_stream, 1449 "Preconditioning: Could not find increment value.\n"); 1450 return 0; 1451 } 1452 else if (GET_CODE (loop_info->increment) != CONST_INT) 1453 { 1454 if (loop_dump_stream) 1455 fprintf (loop_dump_stream, 1456 "Preconditioning: Increment not a constant.\n"); 1457 return 0; 1458 } 1459 else if ((exact_log2 (INTVAL (loop_info->increment)) < 0) 1460 && (exact_log2 (-INTVAL (loop_info->increment)) < 0)) 1461 { 1462 if (loop_dump_stream) 1463 fprintf (loop_dump_stream, 1464 "Preconditioning: Increment not a constant power of 2.\n"); 1465 return 0; 1466 } 1467 1468 /* Unsigned_compare and compare_dir can be ignored here, since they do 1469 not matter for preconditioning. */ 1470 1471 if (loop_info->final_value == 0) 1472 { 1473 if (loop_dump_stream) 1474 fprintf (loop_dump_stream, 1475 "Preconditioning: EQ comparison loop.\n"); 1476 return 0; 1477 } 1478 1479 /* Must ensure that final_value is invariant, so call 1480 loop_invariant_p to check. Before doing so, must check regno 1481 against max_reg_before_loop to make sure that the register is in 1482 the range covered by loop_invariant_p. If it isn't, then it is 1483 most likely a biv/giv which by definition are not invariant. */ 1484 if ((GET_CODE (loop_info->final_value) == REG 1485 && REGNO (loop_info->final_value) >= max_reg_before_loop) 1486 || (GET_CODE (loop_info->final_value) == PLUS 1487 && REGNO (XEXP (loop_info->final_value, 0)) >= max_reg_before_loop) 1488 || ! loop_invariant_p (loop, loop_info->final_value)) 1489 { 1490 if (loop_dump_stream) 1491 fprintf (loop_dump_stream, 1492 "Preconditioning: Final value not invariant.\n"); 1493 return 0; 1494 } 1495 1496 /* Fail for floating point values, since the caller of this function 1497 does not have code to deal with them. */ 1498 if (GET_MODE_CLASS (GET_MODE (loop_info->final_value)) == MODE_FLOAT 1499 || GET_MODE_CLASS (GET_MODE (loop_info->initial_value)) == MODE_FLOAT) 1500 { 1501 if (loop_dump_stream) 1502 fprintf (loop_dump_stream, 1503 "Preconditioning: Floating point final or initial value.\n"); 1504 return 0; 1505 } 1506 1507 /* Fail if loop_info->iteration_var is not live before loop_start, 1508 since we need to test its value in the preconditioning code. */ 1509 1510 if (REGNO_FIRST_LUID (REGNO (loop_info->iteration_var)) 1511 > INSN_LUID (loop_start)) 1512 { 1513 if (loop_dump_stream) 1514 fprintf (loop_dump_stream, 1515 "Preconditioning: Iteration var not live before loop start.\n"); 1516 return 0; 1517 } 1518 1519 /* Note that loop_iterations biases the initial value for GIV iterators 1520 such as "while (i-- > 0)" so that we can calculate the number of 1521 iterations just like for BIV iterators. 1522 1523 Also note that the absolute values of initial_value and 1524 final_value are unimportant as only their difference is used for 1525 calculating the number of loop iterations. */ 1526 *initial_value = loop_info->initial_value; 1527 *increment = loop_info->increment; 1528 *final_value = loop_info->final_value; 1529 1530 /* Decide what mode to do these calculations in. Choose the larger 1531 of final_value's mode and initial_value's mode, or a full-word if 1532 both are constants. */ 1533 *mode = GET_MODE (*final_value); 1534 if (*mode == VOIDmode) 1535 { 1536 *mode = GET_MODE (*initial_value); 1537 if (*mode == VOIDmode) 1538 *mode = word_mode; 1539 } 1540 else if (*mode != GET_MODE (*initial_value) 1541 && (GET_MODE_SIZE (*mode) 1542 < GET_MODE_SIZE (GET_MODE (*initial_value)))) 1543 *mode = GET_MODE (*initial_value); 1544 1545 /* Success! */ 1546 if (loop_dump_stream) 1547 fprintf (loop_dump_stream, "Preconditioning: Successful.\n"); 1548 return 1; 1549 } 1550 1551 /* All pseudo-registers must be mapped to themselves. Two hard registers 1552 must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_ 1553 REGNUM, to avoid function-inlining specific conversions of these 1554 registers. All other hard regs can not be mapped because they may be 1555 used with different 1556 modes. */ 1557 1558 static void 1559 init_reg_map (map, maxregnum) 1560 struct inline_remap *map; 1561 int maxregnum; 1562 { 1563 int i; 1564 1565 for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--) 1566 map->reg_map[i] = regno_reg_rtx[i]; 1567 /* Just clear the rest of the entries. */ 1568 for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--) 1569 map->reg_map[i] = 0; 1570 1571 map->reg_map[VIRTUAL_STACK_VARS_REGNUM] 1572 = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM]; 1573 map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM] 1574 = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM]; 1575 } 1576 1577 /* Strength-reduction will often emit code for optimized biv/givs which 1578 calculates their value in a temporary register, and then copies the result 1579 to the iv. This procedure reconstructs the pattern computing the iv; 1580 verifying that all operands are of the proper form. 1581 1582 PATTERN must be the result of single_set. 1583 The return value is the amount that the giv is incremented by. */ 1584 1585 static rtx 1586 calculate_giv_inc (pattern, src_insn, regno) 1587 rtx pattern, src_insn; 1588 unsigned int regno; 1589 { 1590 rtx increment; 1591 rtx increment_total = 0; 1592 int tries = 0; 1593 1594 retry: 1595 /* Verify that we have an increment insn here. First check for a plus 1596 as the set source. */ 1597 if (GET_CODE (SET_SRC (pattern)) != PLUS) 1598 { 1599 /* SR sometimes computes the new giv value in a temp, then copies it 1600 to the new_reg. */ 1601 src_insn = PREV_INSN (src_insn); 1602 pattern = single_set (src_insn); 1603 if (GET_CODE (SET_SRC (pattern)) != PLUS) 1604 abort (); 1605 1606 /* The last insn emitted is not needed, so delete it to avoid confusing 1607 the second cse pass. This insn sets the giv unnecessarily. */ 1608 delete_related_insns (get_last_insn ()); 1609 } 1610 1611 /* Verify that we have a constant as the second operand of the plus. */ 1612 increment = XEXP (SET_SRC (pattern), 1); 1613 if (GET_CODE (increment) != CONST_INT) 1614 { 1615 /* SR sometimes puts the constant in a register, especially if it is 1616 too big to be an add immed operand. */ 1617 increment = find_last_value (increment, &src_insn, NULL_RTX, 0); 1618 1619 /* SR may have used LO_SUM to compute the constant if it is too large 1620 for a load immed operand. In this case, the constant is in operand 1621 one of the LO_SUM rtx. */ 1622 if (GET_CODE (increment) == LO_SUM) 1623 increment = XEXP (increment, 1); 1624 1625 /* Some ports store large constants in memory and add a REG_EQUAL 1626 note to the store insn. */ 1627 else if (GET_CODE (increment) == MEM) 1628 { 1629 rtx note = find_reg_note (src_insn, REG_EQUAL, 0); 1630 if (note) 1631 increment = XEXP (note, 0); 1632 } 1633 1634 else if (GET_CODE (increment) == IOR 1635 || GET_CODE (increment) == PLUS 1636 || GET_CODE (increment) == ASHIFT 1637 || GET_CODE (increment) == LSHIFTRT) 1638 { 1639 /* The rs6000 port loads some constants with IOR. 1640 The alpha port loads some constants with ASHIFT and PLUS. 1641 The sparc64 port loads some constants with LSHIFTRT. */ 1642 rtx second_part = XEXP (increment, 1); 1643 enum rtx_code code = GET_CODE (increment); 1644 1645 increment = find_last_value (XEXP (increment, 0), 1646 &src_insn, NULL_RTX, 0); 1647 /* Don't need the last insn anymore. */ 1648 delete_related_insns (get_last_insn ()); 1649 1650 if (GET_CODE (second_part) != CONST_INT 1651 || GET_CODE (increment) != CONST_INT) 1652 abort (); 1653 1654 if (code == IOR) 1655 increment = GEN_INT (INTVAL (increment) | INTVAL (second_part)); 1656 else if (code == PLUS) 1657 increment = GEN_INT (INTVAL (increment) + INTVAL (second_part)); 1658 else if (code == ASHIFT) 1659 increment = GEN_INT (INTVAL (increment) << INTVAL (second_part)); 1660 else 1661 increment = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (increment) >> INTVAL (second_part)); 1662 } 1663 1664 if (GET_CODE (increment) != CONST_INT) 1665 abort (); 1666 1667 /* The insn loading the constant into a register is no longer needed, 1668 so delete it. */ 1669 delete_related_insns (get_last_insn ()); 1670 } 1671 1672 if (increment_total) 1673 increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment)); 1674 else 1675 increment_total = increment; 1676 1677 /* Check that the source register is the same as the register we expected 1678 to see as the source. If not, something is seriously wrong. */ 1679 if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG 1680 || REGNO (XEXP (SET_SRC (pattern), 0)) != regno) 1681 { 1682 /* Some machines (e.g. the romp), may emit two add instructions for 1683 certain constants, so lets try looking for another add immediately 1684 before this one if we have only seen one add insn so far. */ 1685 1686 if (tries == 0) 1687 { 1688 tries++; 1689 1690 src_insn = PREV_INSN (src_insn); 1691 pattern = single_set (src_insn); 1692 1693 delete_related_insns (get_last_insn ()); 1694 1695 goto retry; 1696 } 1697 1698 abort (); 1699 } 1700 1701 return increment_total; 1702 } 1703 1704 /* Copy REG_NOTES, except for insn references, because not all insn_map 1705 entries are valid yet. We do need to copy registers now though, because 1706 the reg_map entries can change during copying. */ 1707 1708 static rtx 1709 initial_reg_note_copy (notes, map) 1710 rtx notes; 1711 struct inline_remap *map; 1712 { 1713 rtx copy; 1714 1715 if (notes == 0) 1716 return 0; 1717 1718 copy = rtx_alloc (GET_CODE (notes)); 1719 PUT_REG_NOTE_KIND (copy, REG_NOTE_KIND (notes)); 1720 1721 if (GET_CODE (notes) == EXPR_LIST) 1722 XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map, 0); 1723 else if (GET_CODE (notes) == INSN_LIST) 1724 /* Don't substitute for these yet. */ 1725 XEXP (copy, 0) = copy_rtx (XEXP (notes, 0)); 1726 else 1727 abort (); 1728 1729 XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map); 1730 1731 return copy; 1732 } 1733 1734 /* Fixup insn references in copied REG_NOTES. */ 1735 1736 static void 1737 final_reg_note_copy (notesp, map) 1738 rtx *notesp; 1739 struct inline_remap *map; 1740 { 1741 while (*notesp) 1742 { 1743 rtx note = *notesp; 1744 1745 if (GET_CODE (note) == INSN_LIST) 1746 { 1747 /* Sometimes, we have a REG_WAS_0 note that points to a 1748 deleted instruction. In that case, we can just delete the 1749 note. */ 1750 if (REG_NOTE_KIND (note) == REG_WAS_0) 1751 { 1752 *notesp = XEXP (note, 1); 1753 continue; 1754 } 1755 else 1756 { 1757 rtx insn = map->insn_map[INSN_UID (XEXP (note, 0))]; 1758 1759 /* If we failed to remap the note, something is awry. 1760 Allow REG_LABEL as it may reference label outside 1761 the unrolled loop. */ 1762 if (!insn) 1763 { 1764 if (REG_NOTE_KIND (note) != REG_LABEL) 1765 abort (); 1766 } 1767 else 1768 XEXP (note, 0) = insn; 1769 } 1770 } 1771 1772 notesp = &XEXP (note, 1); 1773 } 1774 } 1775 1776 /* Copy each instruction in the loop, substituting from map as appropriate. 1777 This is very similar to a loop in expand_inline_function. */ 1778 1779 static void 1780 copy_loop_body (loop, copy_start, copy_end, map, exit_label, last_iteration, 1781 unroll_type, start_label, loop_end, insert_before, 1782 copy_notes_from) 1783 struct loop *loop; 1784 rtx copy_start, copy_end; 1785 struct inline_remap *map; 1786 rtx exit_label; 1787 int last_iteration; 1788 enum unroll_types unroll_type; 1789 rtx start_label, loop_end, insert_before, copy_notes_from; 1790 { 1791 struct loop_ivs *ivs = LOOP_IVS (loop); 1792 rtx insn, pattern; 1793 rtx set, tem, copy = NULL_RTX; 1794 int dest_reg_was_split, i; 1795 #ifdef HAVE_cc0 1796 rtx cc0_insn = 0; 1797 #endif 1798 rtx final_label = 0; 1799 rtx giv_inc, giv_dest_reg, giv_src_reg; 1800 1801 /* If this isn't the last iteration, then map any references to the 1802 start_label to final_label. Final label will then be emitted immediately 1803 after the end of this loop body if it was ever used. 1804 1805 If this is the last iteration, then map references to the start_label 1806 to itself. */ 1807 if (! last_iteration) 1808 { 1809 final_label = gen_label_rtx (); 1810 set_label_in_map (map, CODE_LABEL_NUMBER (start_label), final_label); 1811 } 1812 else 1813 set_label_in_map (map, CODE_LABEL_NUMBER (start_label), start_label); 1814 1815 start_sequence (); 1816 1817 insn = copy_start; 1818 do 1819 { 1820 insn = NEXT_INSN (insn); 1821 1822 map->orig_asm_operands_vector = 0; 1823 1824 switch (GET_CODE (insn)) 1825 { 1826 case INSN: 1827 pattern = PATTERN (insn); 1828 copy = 0; 1829 giv_inc = 0; 1830 1831 /* Check to see if this is a giv that has been combined with 1832 some split address givs. (Combined in the sense that 1833 `combine_givs' in loop.c has put two givs in the same register.) 1834 In this case, we must search all givs based on the same biv to 1835 find the address givs. Then split the address givs. 1836 Do this before splitting the giv, since that may map the 1837 SET_DEST to a new register. */ 1838 1839 if ((set = single_set (insn)) 1840 && GET_CODE (SET_DEST (set)) == REG 1841 && addr_combined_regs[REGNO (SET_DEST (set))]) 1842 { 1843 struct iv_class *bl; 1844 struct induction *v, *tv; 1845 unsigned int regno = REGNO (SET_DEST (set)); 1846 1847 v = addr_combined_regs[REGNO (SET_DEST (set))]; 1848 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg)); 1849 1850 /* Although the giv_inc amount is not needed here, we must call 1851 calculate_giv_inc here since it might try to delete the 1852 last insn emitted. If we wait until later to call it, 1853 we might accidentally delete insns generated immediately 1854 below by emit_unrolled_add. */ 1855 1856 giv_inc = calculate_giv_inc (set, insn, regno); 1857 1858 /* Now find all address giv's that were combined with this 1859 giv 'v'. */ 1860 for (tv = bl->giv; tv; tv = tv->next_iv) 1861 if (tv->giv_type == DEST_ADDR && tv->same == v) 1862 { 1863 int this_giv_inc; 1864 1865 /* If this DEST_ADDR giv was not split, then ignore it. */ 1866 if (*tv->location != tv->dest_reg) 1867 continue; 1868 1869 /* Scale this_giv_inc if the multiplicative factors of 1870 the two givs are different. */ 1871 this_giv_inc = INTVAL (giv_inc); 1872 if (tv->mult_val != v->mult_val) 1873 this_giv_inc = (this_giv_inc / INTVAL (v->mult_val) 1874 * INTVAL (tv->mult_val)); 1875 1876 tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc); 1877 *tv->location = tv->dest_reg; 1878 1879 if (last_iteration && unroll_type != UNROLL_COMPLETELY) 1880 { 1881 /* Must emit an insn to increment the split address 1882 giv. Add in the const_adjust field in case there 1883 was a constant eliminated from the address. */ 1884 rtx value, dest_reg; 1885 1886 /* tv->dest_reg will be either a bare register, 1887 or else a register plus a constant. */ 1888 if (GET_CODE (tv->dest_reg) == REG) 1889 dest_reg = tv->dest_reg; 1890 else 1891 dest_reg = XEXP (tv->dest_reg, 0); 1892 1893 /* Check for shared address givs, and avoid 1894 incrementing the shared pseudo reg more than 1895 once. */ 1896 if (! tv->same_insn && ! tv->shared) 1897 { 1898 /* tv->dest_reg may actually be a (PLUS (REG) 1899 (CONST)) here, so we must call plus_constant 1900 to add the const_adjust amount before calling 1901 emit_unrolled_add below. */ 1902 value = plus_constant (tv->dest_reg, 1903 tv->const_adjust); 1904 1905 if (GET_CODE (value) == PLUS) 1906 { 1907 /* The constant could be too large for an add 1908 immediate, so can't directly emit an insn 1909 here. */ 1910 emit_unrolled_add (dest_reg, XEXP (value, 0), 1911 XEXP (value, 1)); 1912 } 1913 } 1914 1915 /* Reset the giv to be just the register again, in case 1916 it is used after the set we have just emitted. 1917 We must subtract the const_adjust factor added in 1918 above. */ 1919 tv->dest_reg = plus_constant (dest_reg, 1920 -tv->const_adjust); 1921 *tv->location = tv->dest_reg; 1922 } 1923 } 1924 } 1925 1926 /* If this is a setting of a splittable variable, then determine 1927 how to split the variable, create a new set based on this split, 1928 and set up the reg_map so that later uses of the variable will 1929 use the new split variable. */ 1930 1931 dest_reg_was_split = 0; 1932 1933 if ((set = single_set (insn)) 1934 && GET_CODE (SET_DEST (set)) == REG 1935 && splittable_regs[REGNO (SET_DEST (set))]) 1936 { 1937 unsigned int regno = REGNO (SET_DEST (set)); 1938 unsigned int src_regno; 1939 1940 dest_reg_was_split = 1; 1941 1942 giv_dest_reg = SET_DEST (set); 1943 giv_src_reg = giv_dest_reg; 1944 /* Compute the increment value for the giv, if it wasn't 1945 already computed above. */ 1946 if (giv_inc == 0) 1947 giv_inc = calculate_giv_inc (set, insn, regno); 1948 1949 src_regno = REGNO (giv_src_reg); 1950 1951 if (unroll_type == UNROLL_COMPLETELY) 1952 { 1953 /* Completely unrolling the loop. Set the induction 1954 variable to a known constant value. */ 1955 1956 /* The value in splittable_regs may be an invariant 1957 value, so we must use plus_constant here. */ 1958 splittable_regs[regno] 1959 = plus_constant (splittable_regs[src_regno], 1960 INTVAL (giv_inc)); 1961 1962 if (GET_CODE (splittable_regs[regno]) == PLUS) 1963 { 1964 giv_src_reg = XEXP (splittable_regs[regno], 0); 1965 giv_inc = XEXP (splittable_regs[regno], 1); 1966 } 1967 else 1968 { 1969 /* The splittable_regs value must be a REG or a 1970 CONST_INT, so put the entire value in the giv_src_reg 1971 variable. */ 1972 giv_src_reg = splittable_regs[regno]; 1973 giv_inc = const0_rtx; 1974 } 1975 } 1976 else 1977 { 1978 /* Partially unrolling loop. Create a new pseudo 1979 register for the iteration variable, and set it to 1980 be a constant plus the original register. Except 1981 on the last iteration, when the result has to 1982 go back into the original iteration var register. */ 1983 1984 /* Handle bivs which must be mapped to a new register 1985 when split. This happens for bivs which need their 1986 final value set before loop entry. The new register 1987 for the biv was stored in the biv's first struct 1988 induction entry by find_splittable_regs. */ 1989 1990 if (regno < ivs->n_regs 1991 && REG_IV_TYPE (ivs, regno) == BASIC_INDUCT) 1992 { 1993 giv_src_reg = REG_IV_CLASS (ivs, regno)->biv->src_reg; 1994 giv_dest_reg = giv_src_reg; 1995 } 1996 1997 #if 0 1998 /* If non-reduced/final-value givs were split, then 1999 this would have to remap those givs also. See 2000 find_splittable_regs. */ 2001 #endif 2002 2003 splittable_regs[regno] 2004 = simplify_gen_binary (PLUS, GET_MODE (giv_src_reg), 2005 giv_inc, 2006 splittable_regs[src_regno]); 2007 giv_inc = splittable_regs[regno]; 2008 2009 /* Now split the induction variable by changing the dest 2010 of this insn to a new register, and setting its 2011 reg_map entry to point to this new register. 2012 2013 If this is the last iteration, and this is the last insn 2014 that will update the iv, then reuse the original dest, 2015 to ensure that the iv will have the proper value when 2016 the loop exits or repeats. 2017 2018 Using splittable_regs_updates here like this is safe, 2019 because it can only be greater than one if all 2020 instructions modifying the iv are always executed in 2021 order. */ 2022 2023 if (! last_iteration 2024 || (splittable_regs_updates[regno]-- != 1)) 2025 { 2026 tem = gen_reg_rtx (GET_MODE (giv_src_reg)); 2027 giv_dest_reg = tem; 2028 map->reg_map[regno] = tem; 2029 record_base_value (REGNO (tem), 2030 giv_inc == const0_rtx 2031 ? giv_src_reg 2032 : gen_rtx_PLUS (GET_MODE (giv_src_reg), 2033 giv_src_reg, giv_inc), 2034 1); 2035 } 2036 else 2037 map->reg_map[regno] = giv_src_reg; 2038 } 2039 2040 /* The constant being added could be too large for an add 2041 immediate, so can't directly emit an insn here. */ 2042 emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc); 2043 copy = get_last_insn (); 2044 pattern = PATTERN (copy); 2045 } 2046 else 2047 { 2048 pattern = copy_rtx_and_substitute (pattern, map, 0); 2049 copy = emit_insn (pattern); 2050 } 2051 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); 2052 INSN_SCOPE (copy) = INSN_SCOPE (insn); 2053 2054 /* If there is a REG_EQUAL note present whose value 2055 is not loop invariant, then delete it, since it 2056 may cause problems with later optimization passes. */ 2057 if ((tem = find_reg_note (copy, REG_EQUAL, NULL_RTX)) 2058 && !loop_invariant_p (loop, XEXP (tem, 0))) 2059 remove_note (copy, tem); 2060 2061 #ifdef HAVE_cc0 2062 /* If this insn is setting CC0, it may need to look at 2063 the insn that uses CC0 to see what type of insn it is. 2064 In that case, the call to recog via validate_change will 2065 fail. So don't substitute constants here. Instead, 2066 do it when we emit the following insn. 2067 2068 For example, see the pyr.md file. That machine has signed and 2069 unsigned compares. The compare patterns must check the 2070 following branch insn to see which what kind of compare to 2071 emit. 2072 2073 If the previous insn set CC0, substitute constants on it as 2074 well. */ 2075 if (sets_cc0_p (PATTERN (copy)) != 0) 2076 cc0_insn = copy; 2077 else 2078 { 2079 if (cc0_insn) 2080 try_constants (cc0_insn, map); 2081 cc0_insn = 0; 2082 try_constants (copy, map); 2083 } 2084 #else 2085 try_constants (copy, map); 2086 #endif 2087 2088 /* Make split induction variable constants `permanent' since we 2089 know there are no backward branches across iteration variable 2090 settings which would invalidate this. */ 2091 if (dest_reg_was_split) 2092 { 2093 int regno = REGNO (SET_DEST (set)); 2094 2095 if ((size_t) regno < VARRAY_SIZE (map->const_equiv_varray) 2096 && (VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age 2097 == map->const_age)) 2098 VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age = -1; 2099 } 2100 break; 2101 2102 case JUMP_INSN: 2103 pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0); 2104 copy = emit_jump_insn (pattern); 2105 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); 2106 INSN_SCOPE (copy) = INSN_SCOPE (insn); 2107 2108 if (JUMP_LABEL (insn)) 2109 { 2110 JUMP_LABEL (copy) = get_label_from_map (map, 2111 CODE_LABEL_NUMBER 2112 (JUMP_LABEL (insn))); 2113 LABEL_NUSES (JUMP_LABEL (copy))++; 2114 } 2115 if (JUMP_LABEL (insn) == start_label && insn == copy_end 2116 && ! last_iteration) 2117 { 2118 2119 /* This is a branch to the beginning of the loop; this is the 2120 last insn being copied; and this is not the last iteration. 2121 In this case, we want to change the original fall through 2122 case to be a branch past the end of the loop, and the 2123 original jump label case to fall_through. */ 2124 2125 if (!invert_jump (copy, exit_label, 0)) 2126 { 2127 rtx jmp; 2128 rtx lab = gen_label_rtx (); 2129 /* Can't do it by reversing the jump (probably because we 2130 couldn't reverse the conditions), so emit a new 2131 jump_insn after COPY, and redirect the jump around 2132 that. */ 2133 jmp = emit_jump_insn_after (gen_jump (exit_label), copy); 2134 JUMP_LABEL (jmp) = exit_label; 2135 LABEL_NUSES (exit_label)++; 2136 jmp = emit_barrier_after (jmp); 2137 emit_label_after (lab, jmp); 2138 LABEL_NUSES (lab) = 0; 2139 if (!redirect_jump (copy, lab, 0)) 2140 abort (); 2141 } 2142 } 2143 2144 #ifdef HAVE_cc0 2145 if (cc0_insn) 2146 try_constants (cc0_insn, map); 2147 cc0_insn = 0; 2148 #endif 2149 try_constants (copy, map); 2150 2151 /* Set the jump label of COPY correctly to avoid problems with 2152 later passes of unroll_loop, if INSN had jump label set. */ 2153 if (JUMP_LABEL (insn)) 2154 { 2155 rtx label = 0; 2156 2157 /* Can't use the label_map for every insn, since this may be 2158 the backward branch, and hence the label was not mapped. */ 2159 if ((set = single_set (copy))) 2160 { 2161 tem = SET_SRC (set); 2162 if (GET_CODE (tem) == LABEL_REF) 2163 label = XEXP (tem, 0); 2164 else if (GET_CODE (tem) == IF_THEN_ELSE) 2165 { 2166 if (XEXP (tem, 1) != pc_rtx) 2167 label = XEXP (XEXP (tem, 1), 0); 2168 else 2169 label = XEXP (XEXP (tem, 2), 0); 2170 } 2171 } 2172 2173 if (label && GET_CODE (label) == CODE_LABEL) 2174 JUMP_LABEL (copy) = label; 2175 else 2176 { 2177 /* An unrecognizable jump insn, probably the entry jump 2178 for a switch statement. This label must have been mapped, 2179 so just use the label_map to get the new jump label. */ 2180 JUMP_LABEL (copy) 2181 = get_label_from_map (map, 2182 CODE_LABEL_NUMBER (JUMP_LABEL (insn))); 2183 } 2184 2185 /* If this is a non-local jump, then must increase the label 2186 use count so that the label will not be deleted when the 2187 original jump is deleted. */ 2188 LABEL_NUSES (JUMP_LABEL (copy))++; 2189 } 2190 else if (GET_CODE (PATTERN (copy)) == ADDR_VEC 2191 || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC) 2192 { 2193 rtx pat = PATTERN (copy); 2194 int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC; 2195 int len = XVECLEN (pat, diff_vec_p); 2196 int i; 2197 2198 for (i = 0; i < len; i++) 2199 LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++; 2200 } 2201 2202 /* If this used to be a conditional jump insn but whose branch 2203 direction is now known, we must do something special. */ 2204 if (any_condjump_p (insn) && onlyjump_p (insn) && map->last_pc_value) 2205 { 2206 #ifdef HAVE_cc0 2207 /* If the previous insn set cc0 for us, delete it. */ 2208 if (only_sets_cc0_p (PREV_INSN (copy))) 2209 delete_related_insns (PREV_INSN (copy)); 2210 #endif 2211 2212 /* If this is now a no-op, delete it. */ 2213 if (map->last_pc_value == pc_rtx) 2214 { 2215 delete_insn (copy); 2216 copy = 0; 2217 } 2218 else 2219 /* Otherwise, this is unconditional jump so we must put a 2220 BARRIER after it. We could do some dead code elimination 2221 here, but jump.c will do it just as well. */ 2222 emit_barrier (); 2223 } 2224 break; 2225 2226 case CALL_INSN: 2227 pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0); 2228 copy = emit_call_insn (pattern); 2229 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); 2230 INSN_SCOPE (copy) = INSN_SCOPE (insn); 2231 SIBLING_CALL_P (copy) = SIBLING_CALL_P (insn); 2232 CONST_OR_PURE_CALL_P (copy) = CONST_OR_PURE_CALL_P (insn); 2233 2234 /* Because the USAGE information potentially contains objects other 2235 than hard registers, we need to copy it. */ 2236 CALL_INSN_FUNCTION_USAGE (copy) 2237 = copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn), 2238 map, 0); 2239 2240 #ifdef HAVE_cc0 2241 if (cc0_insn) 2242 try_constants (cc0_insn, map); 2243 cc0_insn = 0; 2244 #endif 2245 try_constants (copy, map); 2246 2247 /* Be lazy and assume CALL_INSNs clobber all hard registers. */ 2248 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 2249 VARRAY_CONST_EQUIV (map->const_equiv_varray, i).rtx = 0; 2250 break; 2251 2252 case CODE_LABEL: 2253 /* If this is the loop start label, then we don't need to emit a 2254 copy of this label since no one will use it. */ 2255 2256 if (insn != start_label) 2257 { 2258 copy = emit_label (get_label_from_map (map, 2259 CODE_LABEL_NUMBER (insn))); 2260 map->const_age++; 2261 } 2262 break; 2263 2264 case BARRIER: 2265 copy = emit_barrier (); 2266 break; 2267 2268 case NOTE: 2269 /* VTOP and CONT notes are valid only before the loop exit test. 2270 If placed anywhere else, loop may generate bad code. */ 2271 /* BASIC_BLOCK notes exist to stabilize basic block structures with 2272 the associated rtl. We do not want to share the structure in 2273 this new block. */ 2274 2275 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED 2276 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL 2277 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK 2278 && ((NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP 2279 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT) 2280 || (last_iteration && unroll_type != UNROLL_COMPLETELY))) 2281 copy = emit_note (NOTE_SOURCE_FILE (insn), 2282 NOTE_LINE_NUMBER (insn)); 2283 else 2284 copy = 0; 2285 break; 2286 2287 default: 2288 abort (); 2289 } 2290 2291 map->insn_map[INSN_UID (insn)] = copy; 2292 } 2293 while (insn != copy_end); 2294 2295 /* Now finish coping the REG_NOTES. */ 2296 insn = copy_start; 2297 do 2298 { 2299 insn = NEXT_INSN (insn); 2300 if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN 2301 || GET_CODE (insn) == CALL_INSN) 2302 && map->insn_map[INSN_UID (insn)]) 2303 final_reg_note_copy (®_NOTES (map->insn_map[INSN_UID (insn)]), map); 2304 } 2305 while (insn != copy_end); 2306 2307 /* There may be notes between copy_notes_from and loop_end. Emit a copy of 2308 each of these notes here, since there may be some important ones, such as 2309 NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last 2310 iteration, because the original notes won't be deleted. 2311 2312 We can't use insert_before here, because when from preconditioning, 2313 insert_before points before the loop. We can't use copy_end, because 2314 there may be insns already inserted after it (which we don't want to 2315 copy) when not from preconditioning code. */ 2316 2317 if (! last_iteration) 2318 { 2319 for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn)) 2320 { 2321 /* VTOP notes are valid only before the loop exit test. 2322 If placed anywhere else, loop may generate bad code. 2323 Although COPY_NOTES_FROM will be at most one or two (for cc0) 2324 instructions before the last insn in the loop, COPY_NOTES_FROM 2325 can be a NOTE_INSN_LOOP_CONT note if there is no VTOP note, 2326 as in a do .. while loop. */ 2327 if (GET_CODE (insn) == NOTE 2328 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED 2329 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK 2330 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP 2331 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT) 2332 emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn)); 2333 } 2334 } 2335 2336 if (final_label && LABEL_NUSES (final_label) > 0) 2337 emit_label (final_label); 2338 2339 tem = get_insns (); 2340 end_sequence (); 2341 loop_insn_emit_before (loop, 0, insert_before, tem); 2342 } 2343 2344 /* Emit an insn, using the expand_binop to ensure that a valid insn is 2345 emitted. This will correctly handle the case where the increment value 2346 won't fit in the immediate field of a PLUS insns. */ 2347 2348 void 2349 emit_unrolled_add (dest_reg, src_reg, increment) 2350 rtx dest_reg, src_reg, increment; 2351 { 2352 rtx result; 2353 2354 result = expand_simple_binop (GET_MODE (dest_reg), PLUS, src_reg, increment, 2355 dest_reg, 0, OPTAB_LIB_WIDEN); 2356 2357 if (dest_reg != result) 2358 emit_move_insn (dest_reg, result); 2359 } 2360 2361 /* Searches the insns between INSN and LOOP->END. Returns 1 if there 2362 is a backward branch in that range that branches to somewhere between 2363 LOOP->START and INSN. Returns 0 otherwise. */ 2364 2365 /* ??? This is quadratic algorithm. Could be rewritten to be linear. 2366 In practice, this is not a problem, because this function is seldom called, 2367 and uses a negligible amount of CPU time on average. */ 2368 2369 int 2370 back_branch_in_range_p (loop, insn) 2371 const struct loop *loop; 2372 rtx insn; 2373 { 2374 rtx p, q, target_insn; 2375 rtx loop_start = loop->start; 2376 rtx loop_end = loop->end; 2377 rtx orig_loop_end = loop->end; 2378 2379 /* Stop before we get to the backward branch at the end of the loop. */ 2380 loop_end = prev_nonnote_insn (loop_end); 2381 if (GET_CODE (loop_end) == BARRIER) 2382 loop_end = PREV_INSN (loop_end); 2383 2384 /* Check in case insn has been deleted, search forward for first non 2385 deleted insn following it. */ 2386 while (INSN_DELETED_P (insn)) 2387 insn = NEXT_INSN (insn); 2388 2389 /* Check for the case where insn is the last insn in the loop. Deal 2390 with the case where INSN was a deleted loop test insn, in which case 2391 it will now be the NOTE_LOOP_END. */ 2392 if (insn == loop_end || insn == orig_loop_end) 2393 return 0; 2394 2395 for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p)) 2396 { 2397 if (GET_CODE (p) == JUMP_INSN) 2398 { 2399 target_insn = JUMP_LABEL (p); 2400 2401 /* Search from loop_start to insn, to see if one of them is 2402 the target_insn. We can't use INSN_LUID comparisons here, 2403 since insn may not have an LUID entry. */ 2404 for (q = loop_start; q != insn; q = NEXT_INSN (q)) 2405 if (q == target_insn) 2406 return 1; 2407 } 2408 } 2409 2410 return 0; 2411 } 2412 2413 /* Try to generate the simplest rtx for the expression 2414 (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial 2415 value of giv's. */ 2416 2417 static rtx 2418 fold_rtx_mult_add (mult1, mult2, add1, mode) 2419 rtx mult1, mult2, add1; 2420 enum machine_mode mode; 2421 { 2422 rtx temp, mult_res; 2423 rtx result; 2424 2425 /* The modes must all be the same. This should always be true. For now, 2426 check to make sure. */ 2427 if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode) 2428 || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode) 2429 || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode)) 2430 abort (); 2431 2432 /* Ensure that if at least one of mult1/mult2 are constant, then mult2 2433 will be a constant. */ 2434 if (GET_CODE (mult1) == CONST_INT) 2435 { 2436 temp = mult2; 2437 mult2 = mult1; 2438 mult1 = temp; 2439 } 2440 2441 mult_res = simplify_binary_operation (MULT, mode, mult1, mult2); 2442 if (! mult_res) 2443 mult_res = gen_rtx_MULT (mode, mult1, mult2); 2444 2445 /* Again, put the constant second. */ 2446 if (GET_CODE (add1) == CONST_INT) 2447 { 2448 temp = add1; 2449 add1 = mult_res; 2450 mult_res = temp; 2451 } 2452 2453 result = simplify_binary_operation (PLUS, mode, add1, mult_res); 2454 if (! result) 2455 result = gen_rtx_PLUS (mode, add1, mult_res); 2456 2457 return result; 2458 } 2459 2460 /* Searches the list of induction struct's for the biv BL, to try to calculate 2461 the total increment value for one iteration of the loop as a constant. 2462 2463 Returns the increment value as an rtx, simplified as much as possible, 2464 if it can be calculated. Otherwise, returns 0. */ 2465 2466 rtx 2467 biv_total_increment (bl) 2468 const struct iv_class *bl; 2469 { 2470 struct induction *v; 2471 rtx result; 2472 2473 /* For increment, must check every instruction that sets it. Each 2474 instruction must be executed only once each time through the loop. 2475 To verify this, we check that the insn is always executed, and that 2476 there are no backward branches after the insn that branch to before it. 2477 Also, the insn must have a mult_val of one (to make sure it really is 2478 an increment). */ 2479 2480 result = const0_rtx; 2481 for (v = bl->biv; v; v = v->next_iv) 2482 { 2483 if (v->always_computable && v->mult_val == const1_rtx 2484 && ! v->maybe_multiple 2485 && SCALAR_INT_MODE_P (v->mode)) 2486 { 2487 /* If we have already counted it, skip it. */ 2488 if (v->same) 2489 continue; 2490 2491 result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode); 2492 } 2493 else 2494 return 0; 2495 } 2496 2497 return result; 2498 } 2499 2500 /* For each biv and giv, determine whether it can be safely split into 2501 a different variable for each unrolled copy of the loop body. If it 2502 is safe to split, then indicate that by saving some useful info 2503 in the splittable_regs array. 2504 2505 If the loop is being completely unrolled, then splittable_regs will hold 2506 the current value of the induction variable while the loop is unrolled. 2507 It must be set to the initial value of the induction variable here. 2508 Otherwise, splittable_regs will hold the difference between the current 2509 value of the induction variable and the value the induction variable had 2510 at the top of the loop. It must be set to the value 0 here. 2511 2512 Returns the total number of instructions that set registers that are 2513 splittable. */ 2514 2515 /* ?? If the loop is only unrolled twice, then most of the restrictions to 2516 constant values are unnecessary, since we can easily calculate increment 2517 values in this case even if nothing is constant. The increment value 2518 should not involve a multiply however. */ 2519 2520 /* ?? Even if the biv/giv increment values aren't constant, it may still 2521 be beneficial to split the variable if the loop is only unrolled a few 2522 times, since multiplies by small integers (1,2,3,4) are very cheap. */ 2523 2524 static int 2525 find_splittable_regs (loop, unroll_type, unroll_number) 2526 const struct loop *loop; 2527 enum unroll_types unroll_type; 2528 int unroll_number; 2529 { 2530 struct loop_ivs *ivs = LOOP_IVS (loop); 2531 struct iv_class *bl; 2532 struct induction *v; 2533 rtx increment, tem; 2534 rtx biv_final_value; 2535 int biv_splittable; 2536 int result = 0; 2537 2538 for (bl = ivs->list; bl; bl = bl->next) 2539 { 2540 /* Biv_total_increment must return a constant value, 2541 otherwise we can not calculate the split values. */ 2542 2543 increment = biv_total_increment (bl); 2544 if (! increment || GET_CODE (increment) != CONST_INT) 2545 continue; 2546 2547 /* The loop must be unrolled completely, or else have a known number 2548 of iterations and only one exit, or else the biv must be dead 2549 outside the loop, or else the final value must be known. Otherwise, 2550 it is unsafe to split the biv since it may not have the proper 2551 value on loop exit. */ 2552 2553 /* loop_number_exit_count is nonzero if the loop has an exit other than 2554 a fall through at the end. */ 2555 2556 biv_splittable = 1; 2557 biv_final_value = 0; 2558 if (unroll_type != UNROLL_COMPLETELY 2559 && (loop->exit_count || unroll_type == UNROLL_NAIVE) 2560 && (REGNO_LAST_LUID (bl->regno) >= INSN_LUID (loop->end) 2561 || ! bl->init_insn 2562 || INSN_UID (bl->init_insn) >= max_uid_for_loop 2563 || (REGNO_FIRST_LUID (bl->regno) 2564 < INSN_LUID (bl->init_insn)) 2565 || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set))) 2566 && ! (biv_final_value = final_biv_value (loop, bl))) 2567 biv_splittable = 0; 2568 2569 /* If any of the insns setting the BIV don't do so with a simple 2570 PLUS, we don't know how to split it. */ 2571 for (v = bl->biv; biv_splittable && v; v = v->next_iv) 2572 if ((tem = single_set (v->insn)) == 0 2573 || GET_CODE (SET_DEST (tem)) != REG 2574 || REGNO (SET_DEST (tem)) != bl->regno 2575 || GET_CODE (SET_SRC (tem)) != PLUS) 2576 biv_splittable = 0; 2577 2578 /* If final value is nonzero, then must emit an instruction which sets 2579 the value of the biv to the proper value. This is done after 2580 handling all of the givs, since some of them may need to use the 2581 biv's value in their initialization code. */ 2582 2583 /* This biv is splittable. If completely unrolling the loop, save 2584 the biv's initial value. Otherwise, save the constant zero. */ 2585 2586 if (biv_splittable == 1) 2587 { 2588 if (unroll_type == UNROLL_COMPLETELY) 2589 { 2590 /* If the initial value of the biv is itself (i.e. it is too 2591 complicated for strength_reduce to compute), or is a hard 2592 register, or it isn't invariant, then we must create a new 2593 pseudo reg to hold the initial value of the biv. */ 2594 2595 if (GET_CODE (bl->initial_value) == REG 2596 && (REGNO (bl->initial_value) == bl->regno 2597 || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER 2598 || ! loop_invariant_p (loop, bl->initial_value))) 2599 { 2600 rtx tem = gen_reg_rtx (bl->biv->mode); 2601 2602 record_base_value (REGNO (tem), bl->biv->add_val, 0); 2603 loop_insn_hoist (loop, 2604 gen_move_insn (tem, bl->biv->src_reg)); 2605 2606 if (loop_dump_stream) 2607 fprintf (loop_dump_stream, 2608 "Biv %d initial value remapped to %d.\n", 2609 bl->regno, REGNO (tem)); 2610 2611 splittable_regs[bl->regno] = tem; 2612 } 2613 else 2614 splittable_regs[bl->regno] = bl->initial_value; 2615 } 2616 else 2617 splittable_regs[bl->regno] = const0_rtx; 2618 2619 /* Save the number of instructions that modify the biv, so that 2620 we can treat the last one specially. */ 2621 2622 splittable_regs_updates[bl->regno] = bl->biv_count; 2623 result += bl->biv_count; 2624 2625 if (loop_dump_stream) 2626 fprintf (loop_dump_stream, 2627 "Biv %d safe to split.\n", bl->regno); 2628 } 2629 2630 /* Check every giv that depends on this biv to see whether it is 2631 splittable also. Even if the biv isn't splittable, givs which 2632 depend on it may be splittable if the biv is live outside the 2633 loop, and the givs aren't. */ 2634 2635 result += find_splittable_givs (loop, bl, unroll_type, increment, 2636 unroll_number); 2637 2638 /* If final value is nonzero, then must emit an instruction which sets 2639 the value of the biv to the proper value. This is done after 2640 handling all of the givs, since some of them may need to use the 2641 biv's value in their initialization code. */ 2642 if (biv_final_value) 2643 { 2644 /* If the loop has multiple exits, emit the insns before the 2645 loop to ensure that it will always be executed no matter 2646 how the loop exits. Otherwise emit the insn after the loop, 2647 since this is slightly more efficient. */ 2648 if (! loop->exit_count) 2649 loop_insn_sink (loop, gen_move_insn (bl->biv->src_reg, 2650 biv_final_value)); 2651 else 2652 { 2653 /* Create a new register to hold the value of the biv, and then 2654 set the biv to its final value before the loop start. The biv 2655 is set to its final value before loop start to ensure that 2656 this insn will always be executed, no matter how the loop 2657 exits. */ 2658 rtx tem = gen_reg_rtx (bl->biv->mode); 2659 record_base_value (REGNO (tem), bl->biv->add_val, 0); 2660 2661 loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg)); 2662 loop_insn_hoist (loop, gen_move_insn (bl->biv->src_reg, 2663 biv_final_value)); 2664 2665 if (loop_dump_stream) 2666 fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n", 2667 REGNO (bl->biv->src_reg), REGNO (tem)); 2668 2669 /* Set up the mapping from the original biv register to the new 2670 register. */ 2671 bl->biv->src_reg = tem; 2672 } 2673 } 2674 } 2675 return result; 2676 } 2677 2678 /* For every giv based on the biv BL, check to determine whether it is 2679 splittable. This is a subroutine to find_splittable_regs (). 2680 2681 Return the number of instructions that set splittable registers. */ 2682 2683 static int 2684 find_splittable_givs (loop, bl, unroll_type, increment, unroll_number) 2685 const struct loop *loop; 2686 struct iv_class *bl; 2687 enum unroll_types unroll_type; 2688 rtx increment; 2689 int unroll_number ATTRIBUTE_UNUSED; 2690 { 2691 struct loop_ivs *ivs = LOOP_IVS (loop); 2692 struct induction *v, *v2; 2693 rtx final_value; 2694 rtx tem; 2695 int result = 0; 2696 2697 /* Scan the list of givs, and set the same_insn field when there are 2698 multiple identical givs in the same insn. */ 2699 for (v = bl->giv; v; v = v->next_iv) 2700 for (v2 = v->next_iv; v2; v2 = v2->next_iv) 2701 if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg) 2702 && ! v2->same_insn) 2703 v2->same_insn = v; 2704 2705 for (v = bl->giv; v; v = v->next_iv) 2706 { 2707 rtx giv_inc, value; 2708 2709 /* Only split the giv if it has already been reduced, or if the loop is 2710 being completely unrolled. */ 2711 if (unroll_type != UNROLL_COMPLETELY && v->ignore) 2712 continue; 2713 2714 /* The giv can be split if the insn that sets the giv is executed once 2715 and only once on every iteration of the loop. */ 2716 /* An address giv can always be split. v->insn is just a use not a set, 2717 and hence it does not matter whether it is always executed. All that 2718 matters is that all the biv increments are always executed, and we 2719 won't reach here if they aren't. */ 2720 if (v->giv_type != DEST_ADDR 2721 && (! v->always_computable 2722 || back_branch_in_range_p (loop, v->insn))) 2723 continue; 2724 2725 /* The giv increment value must be a constant. */ 2726 giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx, 2727 v->mode); 2728 if (! giv_inc || GET_CODE (giv_inc) != CONST_INT) 2729 continue; 2730 2731 /* The loop must be unrolled completely, or else have a known number of 2732 iterations and only one exit, or else the giv must be dead outside 2733 the loop, or else the final value of the giv must be known. 2734 Otherwise, it is not safe to split the giv since it may not have the 2735 proper value on loop exit. */ 2736 2737 /* The used outside loop test will fail for DEST_ADDR givs. They are 2738 never used outside the loop anyways, so it is always safe to split a 2739 DEST_ADDR giv. */ 2740 2741 final_value = 0; 2742 if (unroll_type != UNROLL_COMPLETELY 2743 && (loop->exit_count || unroll_type == UNROLL_NAIVE) 2744 && v->giv_type != DEST_ADDR 2745 /* The next part is true if the pseudo is used outside the loop. 2746 We assume that this is true for any pseudo created after loop 2747 starts, because we don't have a reg_n_info entry for them. */ 2748 && (REGNO (v->dest_reg) >= max_reg_before_loop 2749 || (REGNO_FIRST_UID (REGNO (v->dest_reg)) != INSN_UID (v->insn) 2750 /* Check for the case where the pseudo is set by a shift/add 2751 sequence, in which case the first insn setting the pseudo 2752 is the first insn of the shift/add sequence. */ 2753 && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX)) 2754 || (REGNO_FIRST_UID (REGNO (v->dest_reg)) 2755 != INSN_UID (XEXP (tem, 0))))) 2756 /* Line above always fails if INSN was moved by loop opt. */ 2757 || (REGNO_LAST_LUID (REGNO (v->dest_reg)) 2758 >= INSN_LUID (loop->end))) 2759 && ! (final_value = v->final_value)) 2760 continue; 2761 2762 #if 0 2763 /* Currently, non-reduced/final-value givs are never split. */ 2764 /* Should emit insns after the loop if possible, as the biv final value 2765 code below does. */ 2766 2767 /* If the final value is nonzero, and the giv has not been reduced, 2768 then must emit an instruction to set the final value. */ 2769 if (final_value && !v->new_reg) 2770 { 2771 /* Create a new register to hold the value of the giv, and then set 2772 the giv to its final value before the loop start. The giv is set 2773 to its final value before loop start to ensure that this insn 2774 will always be executed, no matter how we exit. */ 2775 tem = gen_reg_rtx (v->mode); 2776 loop_insn_hoist (loop, gen_move_insn (tem, v->dest_reg)); 2777 loop_insn_hoist (loop, gen_move_insn (v->dest_reg, final_value)); 2778 2779 if (loop_dump_stream) 2780 fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n", 2781 REGNO (v->dest_reg), REGNO (tem)); 2782 2783 v->src_reg = tem; 2784 } 2785 #endif 2786 2787 /* This giv is splittable. If completely unrolling the loop, save the 2788 giv's initial value. Otherwise, save the constant zero for it. */ 2789 2790 if (unroll_type == UNROLL_COMPLETELY) 2791 { 2792 /* It is not safe to use bl->initial_value here, because it may not 2793 be invariant. It is safe to use the initial value stored in 2794 the splittable_regs array if it is set. In rare cases, it won't 2795 be set, so then we do exactly the same thing as 2796 find_splittable_regs does to get a safe value. */ 2797 rtx biv_initial_value; 2798 2799 if (splittable_regs[bl->regno]) 2800 biv_initial_value = splittable_regs[bl->regno]; 2801 else if (GET_CODE (bl->initial_value) != REG 2802 || (REGNO (bl->initial_value) != bl->regno 2803 && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER)) 2804 biv_initial_value = bl->initial_value; 2805 else 2806 { 2807 rtx tem = gen_reg_rtx (bl->biv->mode); 2808 2809 record_base_value (REGNO (tem), bl->biv->add_val, 0); 2810 loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg)); 2811 biv_initial_value = tem; 2812 } 2813 biv_initial_value = extend_value_for_giv (v, biv_initial_value); 2814 value = fold_rtx_mult_add (v->mult_val, biv_initial_value, 2815 v->add_val, v->mode); 2816 } 2817 else 2818 value = const0_rtx; 2819 2820 if (v->new_reg) 2821 { 2822 /* If a giv was combined with another giv, then we can only split 2823 this giv if the giv it was combined with was reduced. This 2824 is because the value of v->new_reg is meaningless in this 2825 case. */ 2826 if (v->same && ! v->same->new_reg) 2827 { 2828 if (loop_dump_stream) 2829 fprintf (loop_dump_stream, 2830 "giv combined with unreduced giv not split.\n"); 2831 continue; 2832 } 2833 /* If the giv is an address destination, it could be something other 2834 than a simple register, these have to be treated differently. */ 2835 else if (v->giv_type == DEST_REG) 2836 { 2837 /* If value is not a constant, register, or register plus 2838 constant, then compute its value into a register before 2839 loop start. This prevents invalid rtx sharing, and should 2840 generate better code. We can use bl->initial_value here 2841 instead of splittable_regs[bl->regno] because this code 2842 is going before the loop start. */ 2843 if (unroll_type == UNROLL_COMPLETELY 2844 && GET_CODE (value) != CONST_INT 2845 && GET_CODE (value) != REG 2846 && (GET_CODE (value) != PLUS 2847 || GET_CODE (XEXP (value, 0)) != REG 2848 || GET_CODE (XEXP (value, 1)) != CONST_INT)) 2849 { 2850 rtx tem = gen_reg_rtx (v->mode); 2851 record_base_value (REGNO (tem), v->add_val, 0); 2852 loop_iv_add_mult_hoist (loop, 2853 extend_value_for_giv (v, bl->initial_value), 2854 v->mult_val, v->add_val, tem); 2855 value = tem; 2856 } 2857 2858 splittable_regs[reg_or_subregno (v->new_reg)] = value; 2859 } 2860 else 2861 continue; 2862 } 2863 else 2864 { 2865 #if 0 2866 /* Currently, unreduced giv's can't be split. This is not too much 2867 of a problem since unreduced giv's are not live across loop 2868 iterations anyways. When unrolling a loop completely though, 2869 it makes sense to reduce&split givs when possible, as this will 2870 result in simpler instructions, and will not require that a reg 2871 be live across loop iterations. */ 2872 2873 splittable_regs[REGNO (v->dest_reg)] = value; 2874 fprintf (stderr, "Giv %d at insn %d not reduced\n", 2875 REGNO (v->dest_reg), INSN_UID (v->insn)); 2876 #else 2877 continue; 2878 #endif 2879 } 2880 2881 /* Unreduced givs are only updated once by definition. Reduced givs 2882 are updated as many times as their biv is. Mark it so if this is 2883 a splittable register. Don't need to do anything for address givs 2884 where this may not be a register. */ 2885 2886 if (GET_CODE (v->new_reg) == REG) 2887 { 2888 int count = 1; 2889 if (! v->ignore) 2890 count = REG_IV_CLASS (ivs, REGNO (v->src_reg))->biv_count; 2891 2892 splittable_regs_updates[reg_or_subregno (v->new_reg)] = count; 2893 } 2894 2895 result++; 2896 2897 if (loop_dump_stream) 2898 { 2899 int regnum; 2900 2901 if (GET_CODE (v->dest_reg) == CONST_INT) 2902 regnum = -1; 2903 else if (GET_CODE (v->dest_reg) != REG) 2904 regnum = REGNO (XEXP (v->dest_reg, 0)); 2905 else 2906 regnum = REGNO (v->dest_reg); 2907 fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n", 2908 regnum, INSN_UID (v->insn)); 2909 } 2910 } 2911 2912 return result; 2913 } 2914 2915 /* Try to prove that the register is dead after the loop exits. Trace every 2916 loop exit looking for an insn that will always be executed, which sets 2917 the register to some value, and appears before the first use of the register 2918 is found. If successful, then return 1, otherwise return 0. */ 2919 2920 /* ?? Could be made more intelligent in the handling of jumps, so that 2921 it can search past if statements and other similar structures. */ 2922 2923 static int 2924 reg_dead_after_loop (loop, reg) 2925 const struct loop *loop; 2926 rtx reg; 2927 { 2928 rtx insn, label; 2929 enum rtx_code code; 2930 int jump_count = 0; 2931 int label_count = 0; 2932 2933 /* In addition to checking all exits of this loop, we must also check 2934 all exits of inner nested loops that would exit this loop. We don't 2935 have any way to identify those, so we just give up if there are any 2936 such inner loop exits. */ 2937 2938 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label)) 2939 label_count++; 2940 2941 if (label_count != loop->exit_count) 2942 return 0; 2943 2944 /* HACK: Must also search the loop fall through exit, create a label_ref 2945 here which points to the loop->end, and append the loop_number_exit_labels 2946 list to it. */ 2947 label = gen_rtx_LABEL_REF (VOIDmode, loop->end); 2948 LABEL_NEXTREF (label) = loop->exit_labels; 2949 2950 for (; label; label = LABEL_NEXTREF (label)) 2951 { 2952 /* Succeed if find an insn which sets the biv or if reach end of 2953 function. Fail if find an insn that uses the biv, or if come to 2954 a conditional jump. */ 2955 2956 insn = NEXT_INSN (XEXP (label, 0)); 2957 while (insn) 2958 { 2959 code = GET_CODE (insn); 2960 if (GET_RTX_CLASS (code) == 'i') 2961 { 2962 rtx set, note; 2963 2964 if (reg_referenced_p (reg, PATTERN (insn))) 2965 return 0; 2966 2967 note = find_reg_equal_equiv_note (insn); 2968 if (note && reg_overlap_mentioned_p (reg, XEXP (note, 0))) 2969 return 0; 2970 2971 set = single_set (insn); 2972 if (set && rtx_equal_p (SET_DEST (set), reg)) 2973 break; 2974 } 2975 2976 if (code == JUMP_INSN) 2977 { 2978 if (GET_CODE (PATTERN (insn)) == RETURN) 2979 break; 2980 else if (!any_uncondjump_p (insn) 2981 /* Prevent infinite loop following infinite loops. */ 2982 || jump_count++ > 20) 2983 return 0; 2984 else 2985 insn = JUMP_LABEL (insn); 2986 } 2987 2988 insn = NEXT_INSN (insn); 2989 } 2990 } 2991 2992 /* Success, the register is dead on all loop exits. */ 2993 return 1; 2994 } 2995 2996 /* Try to calculate the final value of the biv, the value it will have at 2997 the end of the loop. If we can do it, return that value. */ 2998 2999 rtx 3000 final_biv_value (loop, bl) 3001 const struct loop *loop; 3002 struct iv_class *bl; 3003 { 3004 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations; 3005 rtx increment, tem; 3006 3007 /* ??? This only works for MODE_INT biv's. Reject all others for now. */ 3008 3009 if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT) 3010 return 0; 3011 3012 /* The final value for reversed bivs must be calculated differently than 3013 for ordinary bivs. In this case, there is already an insn after the 3014 loop which sets this biv's final value (if necessary), and there are 3015 no other loop exits, so we can return any value. */ 3016 if (bl->reversed) 3017 { 3018 if (loop_dump_stream) 3019 fprintf (loop_dump_stream, 3020 "Final biv value for %d, reversed biv.\n", bl->regno); 3021 3022 return const0_rtx; 3023 } 3024 3025 /* Try to calculate the final value as initial value + (number of iterations 3026 * increment). For this to work, increment must be invariant, the only 3027 exit from the loop must be the fall through at the bottom (otherwise 3028 it may not have its final value when the loop exits), and the initial 3029 value of the biv must be invariant. */ 3030 3031 if (n_iterations != 0 3032 && ! loop->exit_count 3033 && loop_invariant_p (loop, bl->initial_value)) 3034 { 3035 increment = biv_total_increment (bl); 3036 3037 if (increment && loop_invariant_p (loop, increment)) 3038 { 3039 /* Can calculate the loop exit value, emit insns after loop 3040 end to calculate this value into a temporary register in 3041 case it is needed later. */ 3042 3043 tem = gen_reg_rtx (bl->biv->mode); 3044 record_base_value (REGNO (tem), bl->biv->add_val, 0); 3045 loop_iv_add_mult_sink (loop, increment, GEN_INT (n_iterations), 3046 bl->initial_value, tem); 3047 3048 if (loop_dump_stream) 3049 fprintf (loop_dump_stream, 3050 "Final biv value for %d, calculated.\n", bl->regno); 3051 3052 return tem; 3053 } 3054 } 3055 3056 /* Check to see if the biv is dead at all loop exits. */ 3057 if (reg_dead_after_loop (loop, bl->biv->src_reg)) 3058 { 3059 if (loop_dump_stream) 3060 fprintf (loop_dump_stream, 3061 "Final biv value for %d, biv dead after loop exit.\n", 3062 bl->regno); 3063 3064 return const0_rtx; 3065 } 3066 3067 return 0; 3068 } 3069 3070 /* Try to calculate the final value of the giv, the value it will have at 3071 the end of the loop. If we can do it, return that value. */ 3072 3073 rtx 3074 final_giv_value (loop, v) 3075 const struct loop *loop; 3076 struct induction *v; 3077 { 3078 struct loop_ivs *ivs = LOOP_IVS (loop); 3079 struct iv_class *bl; 3080 rtx insn; 3081 rtx increment, tem; 3082 rtx seq; 3083 rtx loop_end = loop->end; 3084 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations; 3085 3086 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg)); 3087 3088 /* The final value for givs which depend on reversed bivs must be calculated 3089 differently than for ordinary givs. In this case, there is already an 3090 insn after the loop which sets this giv's final value (if necessary), 3091 and there are no other loop exits, so we can return any value. */ 3092 if (bl->reversed) 3093 { 3094 if (loop_dump_stream) 3095 fprintf (loop_dump_stream, 3096 "Final giv value for %d, depends on reversed biv\n", 3097 REGNO (v->dest_reg)); 3098 return const0_rtx; 3099 } 3100 3101 /* Try to calculate the final value as a function of the biv it depends 3102 upon. The only exit from the loop must be the fall through at the bottom 3103 and the insn that sets the giv must be executed on every iteration 3104 (otherwise the giv may not have its final value when the loop exits). */ 3105 3106 /* ??? Can calculate the final giv value by subtracting off the 3107 extra biv increments times the giv's mult_val. The loop must have 3108 only one exit for this to work, but the loop iterations does not need 3109 to be known. */ 3110 3111 if (n_iterations != 0 3112 && ! loop->exit_count 3113 && v->always_executed) 3114 { 3115 /* ?? It is tempting to use the biv's value here since these insns will 3116 be put after the loop, and hence the biv will have its final value 3117 then. However, this fails if the biv is subsequently eliminated. 3118 Perhaps determine whether biv's are eliminable before trying to 3119 determine whether giv's are replaceable so that we can use the 3120 biv value here if it is not eliminable. */ 3121 3122 /* We are emitting code after the end of the loop, so we must make 3123 sure that bl->initial_value is still valid then. It will still 3124 be valid if it is invariant. */ 3125 3126 increment = biv_total_increment (bl); 3127 3128 if (increment && loop_invariant_p (loop, increment) 3129 && loop_invariant_p (loop, bl->initial_value)) 3130 { 3131 /* Can calculate the loop exit value of its biv as 3132 (n_iterations * increment) + initial_value */ 3133 3134 /* The loop exit value of the giv is then 3135 (final_biv_value - extra increments) * mult_val + add_val. 3136 The extra increments are any increments to the biv which 3137 occur in the loop after the giv's value is calculated. 3138 We must search from the insn that sets the giv to the end 3139 of the loop to calculate this value. */ 3140 3141 /* Put the final biv value in tem. */ 3142 tem = gen_reg_rtx (v->mode); 3143 record_base_value (REGNO (tem), bl->biv->add_val, 0); 3144 loop_iv_add_mult_sink (loop, extend_value_for_giv (v, increment), 3145 GEN_INT (n_iterations), 3146 extend_value_for_giv (v, bl->initial_value), 3147 tem); 3148 3149 /* Subtract off extra increments as we find them. */ 3150 for (insn = NEXT_INSN (v->insn); insn != loop_end; 3151 insn = NEXT_INSN (insn)) 3152 { 3153 struct induction *biv; 3154 3155 for (biv = bl->biv; biv; biv = biv->next_iv) 3156 if (biv->insn == insn) 3157 { 3158 start_sequence (); 3159 tem = expand_simple_binop (GET_MODE (tem), MINUS, tem, 3160 biv->add_val, NULL_RTX, 0, 3161 OPTAB_LIB_WIDEN); 3162 seq = get_insns (); 3163 end_sequence (); 3164 loop_insn_sink (loop, seq); 3165 } 3166 } 3167 3168 /* Now calculate the giv's final value. */ 3169 loop_iv_add_mult_sink (loop, tem, v->mult_val, v->add_val, tem); 3170 3171 if (loop_dump_stream) 3172 fprintf (loop_dump_stream, 3173 "Final giv value for %d, calc from biv's value.\n", 3174 REGNO (v->dest_reg)); 3175 3176 return tem; 3177 } 3178 } 3179 3180 /* Replaceable giv's should never reach here. */ 3181 if (v->replaceable) 3182 abort (); 3183 3184 /* Check to see if the biv is dead at all loop exits. */ 3185 if (reg_dead_after_loop (loop, v->dest_reg)) 3186 { 3187 if (loop_dump_stream) 3188 fprintf (loop_dump_stream, 3189 "Final giv value for %d, giv dead after loop exit.\n", 3190 REGNO (v->dest_reg)); 3191 3192 return const0_rtx; 3193 } 3194 3195 return 0; 3196 } 3197 3198 /* Look back before LOOP->START for the insn that sets REG and return 3199 the equivalent constant if there is a REG_EQUAL note otherwise just 3200 the SET_SRC of REG. */ 3201 3202 static rtx 3203 loop_find_equiv_value (loop, reg) 3204 const struct loop *loop; 3205 rtx reg; 3206 { 3207 rtx loop_start = loop->start; 3208 rtx insn, set; 3209 rtx ret; 3210 3211 ret = reg; 3212 for (insn = PREV_INSN (loop_start); insn; insn = PREV_INSN (insn)) 3213 { 3214 if (GET_CODE (insn) == CODE_LABEL) 3215 break; 3216 3217 else if (INSN_P (insn) && reg_set_p (reg, insn)) 3218 { 3219 /* We found the last insn before the loop that sets the register. 3220 If it sets the entire register, and has a REG_EQUAL note, 3221 then use the value of the REG_EQUAL note. */ 3222 if ((set = single_set (insn)) 3223 && (SET_DEST (set) == reg)) 3224 { 3225 rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX); 3226 3227 /* Only use the REG_EQUAL note if it is a constant. 3228 Other things, divide in particular, will cause 3229 problems later if we use them. */ 3230 if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST 3231 && CONSTANT_P (XEXP (note, 0))) 3232 ret = XEXP (note, 0); 3233 else 3234 ret = SET_SRC (set); 3235 3236 /* We cannot do this if it changes between the 3237 assignment and loop start though. */ 3238 if (modified_between_p (ret, insn, loop_start)) 3239 ret = reg; 3240 } 3241 break; 3242 } 3243 } 3244 return ret; 3245 } 3246 3247 /* Return a simplified rtx for the expression OP - REG. 3248 3249 REG must appear in OP, and OP must be a register or the sum of a register 3250 and a second term. 3251 3252 Thus, the return value must be const0_rtx or the second term. 3253 3254 The caller is responsible for verifying that REG appears in OP and OP has 3255 the proper form. */ 3256 3257 static rtx 3258 subtract_reg_term (op, reg) 3259 rtx op, reg; 3260 { 3261 if (op == reg) 3262 return const0_rtx; 3263 if (GET_CODE (op) == PLUS) 3264 { 3265 if (XEXP (op, 0) == reg) 3266 return XEXP (op, 1); 3267 else if (XEXP (op, 1) == reg) 3268 return XEXP (op, 0); 3269 } 3270 /* OP does not contain REG as a term. */ 3271 abort (); 3272 } 3273 3274 /* Find and return register term common to both expressions OP0 and 3275 OP1 or NULL_RTX if no such term exists. Each expression must be a 3276 REG or a PLUS of a REG. */ 3277 3278 static rtx 3279 find_common_reg_term (op0, op1) 3280 rtx op0, op1; 3281 { 3282 if ((GET_CODE (op0) == REG || GET_CODE (op0) == PLUS) 3283 && (GET_CODE (op1) == REG || GET_CODE (op1) == PLUS)) 3284 { 3285 rtx op00; 3286 rtx op01; 3287 rtx op10; 3288 rtx op11; 3289 3290 if (GET_CODE (op0) == PLUS) 3291 op01 = XEXP (op0, 1), op00 = XEXP (op0, 0); 3292 else 3293 op01 = const0_rtx, op00 = op0; 3294 3295 if (GET_CODE (op1) == PLUS) 3296 op11 = XEXP (op1, 1), op10 = XEXP (op1, 0); 3297 else 3298 op11 = const0_rtx, op10 = op1; 3299 3300 /* Find and return common register term if present. */ 3301 if (REG_P (op00) && (op00 == op10 || op00 == op11)) 3302 return op00; 3303 else if (REG_P (op01) && (op01 == op10 || op01 == op11)) 3304 return op01; 3305 } 3306 3307 /* No common register term found. */ 3308 return NULL_RTX; 3309 } 3310 3311 /* Determine the loop iterator and calculate the number of loop 3312 iterations. Returns the exact number of loop iterations if it can 3313 be calculated, otherwise returns zero. */ 3314 3315 unsigned HOST_WIDE_INT 3316 loop_iterations (loop) 3317 struct loop *loop; 3318 { 3319 struct loop_info *loop_info = LOOP_INFO (loop); 3320 struct loop_ivs *ivs = LOOP_IVS (loop); 3321 rtx comparison, comparison_value; 3322 rtx iteration_var, initial_value, increment, final_value; 3323 enum rtx_code comparison_code; 3324 HOST_WIDE_INT inc; 3325 unsigned HOST_WIDE_INT abs_inc; 3326 unsigned HOST_WIDE_INT abs_diff; 3327 int off_by_one; 3328 int increment_dir; 3329 int unsigned_p, compare_dir, final_larger; 3330 rtx last_loop_insn; 3331 rtx reg_term; 3332 struct iv_class *bl; 3333 3334 loop_info->n_iterations = 0; 3335 loop_info->initial_value = 0; 3336 loop_info->initial_equiv_value = 0; 3337 loop_info->comparison_value = 0; 3338 loop_info->final_value = 0; 3339 loop_info->final_equiv_value = 0; 3340 loop_info->increment = 0; 3341 loop_info->iteration_var = 0; 3342 loop_info->unroll_number = 1; 3343 loop_info->iv = 0; 3344 3345 /* We used to use prev_nonnote_insn here, but that fails because it might 3346 accidentally get the branch for a contained loop if the branch for this 3347 loop was deleted. We can only trust branches immediately before the 3348 loop_end. */ 3349 last_loop_insn = PREV_INSN (loop->end); 3350 3351 /* ??? We should probably try harder to find the jump insn 3352 at the end of the loop. The following code assumes that 3353 the last loop insn is a jump to the top of the loop. */ 3354 if (GET_CODE (last_loop_insn) != JUMP_INSN) 3355 { 3356 if (loop_dump_stream) 3357 fprintf (loop_dump_stream, 3358 "Loop iterations: No final conditional branch found.\n"); 3359 return 0; 3360 } 3361 3362 /* If there is a more than a single jump to the top of the loop 3363 we cannot (easily) determine the iteration count. */ 3364 if (LABEL_NUSES (JUMP_LABEL (last_loop_insn)) > 1) 3365 { 3366 if (loop_dump_stream) 3367 fprintf (loop_dump_stream, 3368 "Loop iterations: Loop has multiple back edges.\n"); 3369 return 0; 3370 } 3371 3372 /* If there are multiple conditionalized loop exit tests, they may jump 3373 back to differing CODE_LABELs. */ 3374 if (loop->top && loop->cont) 3375 { 3376 rtx temp = PREV_INSN (last_loop_insn); 3377 3378 do 3379 { 3380 if (GET_CODE (temp) == JUMP_INSN) 3381 { 3382 /* There are some kinds of jumps we can't deal with easily. */ 3383 if (JUMP_LABEL (temp) == 0) 3384 { 3385 if (loop_dump_stream) 3386 fprintf 3387 (loop_dump_stream, 3388 "Loop iterations: Jump insn has null JUMP_LABEL.\n"); 3389 return 0; 3390 } 3391 3392 if (/* Previous unrolling may have generated new insns not 3393 covered by the uid_luid array. */ 3394 INSN_UID (JUMP_LABEL (temp)) < max_uid_for_loop 3395 /* Check if we jump back into the loop body. */ 3396 && INSN_LUID (JUMP_LABEL (temp)) > INSN_LUID (loop->top) 3397 && INSN_LUID (JUMP_LABEL (temp)) < INSN_LUID (loop->cont)) 3398 { 3399 if (loop_dump_stream) 3400 fprintf 3401 (loop_dump_stream, 3402 "Loop iterations: Loop has multiple back edges.\n"); 3403 return 0; 3404 } 3405 } 3406 } 3407 while ((temp = PREV_INSN (temp)) != loop->cont); 3408 } 3409 3410 /* Find the iteration variable. If the last insn is a conditional 3411 branch, and the insn before tests a register value, make that the 3412 iteration variable. */ 3413 3414 comparison = get_condition_for_loop (loop, last_loop_insn); 3415 if (comparison == 0) 3416 { 3417 if (loop_dump_stream) 3418 fprintf (loop_dump_stream, 3419 "Loop iterations: No final comparison found.\n"); 3420 return 0; 3421 } 3422 3423 /* ??? Get_condition may switch position of induction variable and 3424 invariant register when it canonicalizes the comparison. */ 3425 3426 comparison_code = GET_CODE (comparison); 3427 iteration_var = XEXP (comparison, 0); 3428 comparison_value = XEXP (comparison, 1); 3429 3430 if (GET_CODE (iteration_var) != REG) 3431 { 3432 if (loop_dump_stream) 3433 fprintf (loop_dump_stream, 3434 "Loop iterations: Comparison not against register.\n"); 3435 return 0; 3436 } 3437 3438 /* The only new registers that are created before loop iterations 3439 are givs made from biv increments or registers created by 3440 load_mems. In the latter case, it is possible that try_copy_prop 3441 will propagate a new pseudo into the old iteration register but 3442 this will be marked by having the REG_USERVAR_P bit set. */ 3443 3444 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs 3445 && ! REG_USERVAR_P (iteration_var)) 3446 abort (); 3447 3448 /* Determine the initial value of the iteration variable, and the amount 3449 that it is incremented each loop. Use the tables constructed by 3450 the strength reduction pass to calculate these values. */ 3451 3452 /* Clear the result values, in case no answer can be found. */ 3453 initial_value = 0; 3454 increment = 0; 3455 3456 /* The iteration variable can be either a giv or a biv. Check to see 3457 which it is, and compute the variable's initial value, and increment 3458 value if possible. */ 3459 3460 /* If this is a new register, can't handle it since we don't have any 3461 reg_iv_type entry for it. */ 3462 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs) 3463 { 3464 if (loop_dump_stream) 3465 fprintf (loop_dump_stream, 3466 "Loop iterations: No reg_iv_type entry for iteration var.\n"); 3467 return 0; 3468 } 3469 3470 /* Reject iteration variables larger than the host wide int size, since they 3471 could result in a number of iterations greater than the range of our 3472 `unsigned HOST_WIDE_INT' variable loop_info->n_iterations. */ 3473 else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var)) 3474 > HOST_BITS_PER_WIDE_INT)) 3475 { 3476 if (loop_dump_stream) 3477 fprintf (loop_dump_stream, 3478 "Loop iterations: Iteration var rejected because mode too large.\n"); 3479 return 0; 3480 } 3481 else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT) 3482 { 3483 if (loop_dump_stream) 3484 fprintf (loop_dump_stream, 3485 "Loop iterations: Iteration var not an integer.\n"); 3486 return 0; 3487 } 3488 else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT) 3489 { 3490 if (REGNO (iteration_var) >= ivs->n_regs) 3491 abort (); 3492 3493 /* Grab initial value, only useful if it is a constant. */ 3494 bl = REG_IV_CLASS (ivs, REGNO (iteration_var)); 3495 initial_value = bl->initial_value; 3496 if (!bl->biv->always_executed || bl->biv->maybe_multiple) 3497 { 3498 if (loop_dump_stream) 3499 fprintf (loop_dump_stream, 3500 "Loop iterations: Basic induction var not set once in each iteration.\n"); 3501 return 0; 3502 } 3503 3504 increment = biv_total_increment (bl); 3505 } 3506 else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT) 3507 { 3508 HOST_WIDE_INT offset = 0; 3509 struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var)); 3510 rtx biv_initial_value; 3511 3512 if (REGNO (v->src_reg) >= ivs->n_regs) 3513 abort (); 3514 3515 if (!v->always_executed || v->maybe_multiple) 3516 { 3517 if (loop_dump_stream) 3518 fprintf (loop_dump_stream, 3519 "Loop iterations: General induction var not set once in each iteration.\n"); 3520 return 0; 3521 } 3522 3523 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg)); 3524 3525 /* Increment value is mult_val times the increment value of the biv. */ 3526 3527 increment = biv_total_increment (bl); 3528 if (increment) 3529 { 3530 struct induction *biv_inc; 3531 3532 increment = fold_rtx_mult_add (v->mult_val, 3533 extend_value_for_giv (v, increment), 3534 const0_rtx, v->mode); 3535 /* The caller assumes that one full increment has occurred at the 3536 first loop test. But that's not true when the biv is incremented 3537 after the giv is set (which is the usual case), e.g.: 3538 i = 6; do {;} while (i++ < 9) . 3539 Therefore, we bias the initial value by subtracting the amount of 3540 the increment that occurs between the giv set and the giv test. */ 3541 for (biv_inc = bl->biv; biv_inc; biv_inc = biv_inc->next_iv) 3542 { 3543 if (loop_insn_first_p (v->insn, biv_inc->insn)) 3544 { 3545 if (REG_P (biv_inc->add_val)) 3546 { 3547 if (loop_dump_stream) 3548 fprintf (loop_dump_stream, 3549 "Loop iterations: Basic induction var add_val is REG %d.\n", 3550 REGNO (biv_inc->add_val)); 3551 return 0; 3552 } 3553 3554 /* If we have already counted it, skip it. */ 3555 if (biv_inc->same) 3556 continue; 3557 3558 offset -= INTVAL (biv_inc->add_val); 3559 } 3560 } 3561 } 3562 if (loop_dump_stream) 3563 fprintf (loop_dump_stream, 3564 "Loop iterations: Giv iterator, initial value bias %ld.\n", 3565 (long) offset); 3566 3567 /* Initial value is mult_val times the biv's initial value plus 3568 add_val. Only useful if it is a constant. */ 3569 biv_initial_value = extend_value_for_giv (v, bl->initial_value); 3570 initial_value 3571 = fold_rtx_mult_add (v->mult_val, 3572 plus_constant (biv_initial_value, offset), 3573 v->add_val, v->mode); 3574 } 3575 else 3576 { 3577 if (loop_dump_stream) 3578 fprintf (loop_dump_stream, 3579 "Loop iterations: Not basic or general induction var.\n"); 3580 return 0; 3581 } 3582 3583 if (initial_value == 0) 3584 return 0; 3585 3586 unsigned_p = 0; 3587 off_by_one = 0; 3588 switch (comparison_code) 3589 { 3590 case LEU: 3591 unsigned_p = 1; 3592 case LE: 3593 compare_dir = 1; 3594 off_by_one = 1; 3595 break; 3596 case GEU: 3597 unsigned_p = 1; 3598 case GE: 3599 compare_dir = -1; 3600 off_by_one = -1; 3601 break; 3602 case EQ: 3603 /* Cannot determine loop iterations with this case. */ 3604 compare_dir = 0; 3605 break; 3606 case LTU: 3607 unsigned_p = 1; 3608 case LT: 3609 compare_dir = 1; 3610 break; 3611 case GTU: 3612 unsigned_p = 1; 3613 case GT: 3614 compare_dir = -1; 3615 case NE: 3616 compare_dir = 0; 3617 break; 3618 default: 3619 abort (); 3620 } 3621 3622 /* If the comparison value is an invariant register, then try to find 3623 its value from the insns before the start of the loop. */ 3624 3625 final_value = comparison_value; 3626 if (GET_CODE (comparison_value) == REG 3627 && loop_invariant_p (loop, comparison_value)) 3628 { 3629 final_value = loop_find_equiv_value (loop, comparison_value); 3630 3631 /* If we don't get an invariant final value, we are better 3632 off with the original register. */ 3633 if (! loop_invariant_p (loop, final_value)) 3634 final_value = comparison_value; 3635 } 3636 3637 /* Calculate the approximate final value of the induction variable 3638 (on the last successful iteration). The exact final value 3639 depends on the branch operator, and increment sign. It will be 3640 wrong if the iteration variable is not incremented by one each 3641 time through the loop and (comparison_value + off_by_one - 3642 initial_value) % increment != 0. 3643 ??? Note that the final_value may overflow and thus final_larger 3644 will be bogus. A potentially infinite loop will be classified 3645 as immediate, e.g. for (i = 0x7ffffff0; i <= 0x7fffffff; i++) */ 3646 if (off_by_one) 3647 final_value = plus_constant (final_value, off_by_one); 3648 3649 /* Save the calculated values describing this loop's bounds, in case 3650 precondition_loop_p will need them later. These values can not be 3651 recalculated inside precondition_loop_p because strength reduction 3652 optimizations may obscure the loop's structure. 3653 3654 These values are only required by precondition_loop_p and insert_bct 3655 whenever the number of iterations cannot be computed at compile time. 3656 Only the difference between final_value and initial_value is 3657 important. Note that final_value is only approximate. */ 3658 loop_info->initial_value = initial_value; 3659 loop_info->comparison_value = comparison_value; 3660 loop_info->final_value = plus_constant (comparison_value, off_by_one); 3661 loop_info->increment = increment; 3662 loop_info->iteration_var = iteration_var; 3663 loop_info->comparison_code = comparison_code; 3664 loop_info->iv = bl; 3665 3666 /* Try to determine the iteration count for loops such 3667 as (for i = init; i < init + const; i++). When running the 3668 loop optimization twice, the first pass often converts simple 3669 loops into this form. */ 3670 3671 if (REG_P (initial_value)) 3672 { 3673 rtx reg1; 3674 rtx reg2; 3675 rtx const2; 3676 3677 reg1 = initial_value; 3678 if (GET_CODE (final_value) == PLUS) 3679 reg2 = XEXP (final_value, 0), const2 = XEXP (final_value, 1); 3680 else 3681 reg2 = final_value, const2 = const0_rtx; 3682 3683 /* Check for initial_value = reg1, final_value = reg2 + const2, 3684 where reg1 != reg2. */ 3685 if (REG_P (reg2) && reg2 != reg1) 3686 { 3687 rtx temp; 3688 3689 /* Find what reg1 is equivalent to. Hopefully it will 3690 either be reg2 or reg2 plus a constant. */ 3691 temp = loop_find_equiv_value (loop, reg1); 3692 3693 if (find_common_reg_term (temp, reg2)) 3694 initial_value = temp; 3695 else if (loop_invariant_p (loop, reg2)) 3696 { 3697 /* Find what reg2 is equivalent to. Hopefully it will 3698 either be reg1 or reg1 plus a constant. Let's ignore 3699 the latter case for now since it is not so common. */ 3700 temp = loop_find_equiv_value (loop, reg2); 3701 3702 if (temp == loop_info->iteration_var) 3703 temp = initial_value; 3704 if (temp == reg1) 3705 final_value = (const2 == const0_rtx) 3706 ? reg1 : gen_rtx_PLUS (GET_MODE (reg1), reg1, const2); 3707 } 3708 } 3709 else if (loop->vtop && GET_CODE (reg2) == CONST_INT) 3710 { 3711 rtx temp; 3712 3713 /* When running the loop optimizer twice, check_dbra_loop 3714 further obfuscates reversible loops of the form: 3715 for (i = init; i < init + const; i++). We often end up with 3716 final_value = 0, initial_value = temp, temp = temp2 - init, 3717 where temp2 = init + const. If the loop has a vtop we 3718 can replace initial_value with const. */ 3719 3720 temp = loop_find_equiv_value (loop, reg1); 3721 3722 if (GET_CODE (temp) == MINUS && REG_P (XEXP (temp, 0))) 3723 { 3724 rtx temp2 = loop_find_equiv_value (loop, XEXP (temp, 0)); 3725 3726 if (GET_CODE (temp2) == PLUS 3727 && XEXP (temp2, 0) == XEXP (temp, 1)) 3728 initial_value = XEXP (temp2, 1); 3729 } 3730 } 3731 } 3732 3733 /* If have initial_value = reg + const1 and final_value = reg + 3734 const2, then replace initial_value with const1 and final_value 3735 with const2. This should be safe since we are protected by the 3736 initial comparison before entering the loop if we have a vtop. 3737 For example, a + b < a + c is not equivalent to b < c for all a 3738 when using modulo arithmetic. 3739 3740 ??? Without a vtop we could still perform the optimization if we check 3741 the initial and final values carefully. */ 3742 if (loop->vtop 3743 && (reg_term = find_common_reg_term (initial_value, final_value))) 3744 { 3745 initial_value = subtract_reg_term (initial_value, reg_term); 3746 final_value = subtract_reg_term (final_value, reg_term); 3747 } 3748 3749 loop_info->initial_equiv_value = initial_value; 3750 loop_info->final_equiv_value = final_value; 3751 3752 /* For EQ comparison loops, we don't have a valid final value. 3753 Check this now so that we won't leave an invalid value if we 3754 return early for any other reason. */ 3755 if (comparison_code == EQ) 3756 loop_info->final_equiv_value = loop_info->final_value = 0; 3757 3758 if (increment == 0) 3759 { 3760 if (loop_dump_stream) 3761 fprintf (loop_dump_stream, 3762 "Loop iterations: Increment value can't be calculated.\n"); 3763 return 0; 3764 } 3765 3766 if (GET_CODE (increment) != CONST_INT) 3767 { 3768 /* If we have a REG, check to see if REG holds a constant value. */ 3769 /* ??? Other RTL, such as (neg (reg)) is possible here, but it isn't 3770 clear if it is worthwhile to try to handle such RTL. */ 3771 if (GET_CODE (increment) == REG || GET_CODE (increment) == SUBREG) 3772 increment = loop_find_equiv_value (loop, increment); 3773 3774 if (GET_CODE (increment) != CONST_INT) 3775 { 3776 if (loop_dump_stream) 3777 { 3778 fprintf (loop_dump_stream, 3779 "Loop iterations: Increment value not constant "); 3780 print_simple_rtl (loop_dump_stream, increment); 3781 fprintf (loop_dump_stream, ".\n"); 3782 } 3783 return 0; 3784 } 3785 loop_info->increment = increment; 3786 } 3787 3788 if (GET_CODE (initial_value) != CONST_INT) 3789 { 3790 if (loop_dump_stream) 3791 { 3792 fprintf (loop_dump_stream, 3793 "Loop iterations: Initial value not constant "); 3794 print_simple_rtl (loop_dump_stream, initial_value); 3795 fprintf (loop_dump_stream, ".\n"); 3796 } 3797 return 0; 3798 } 3799 else if (GET_CODE (final_value) != CONST_INT) 3800 { 3801 if (loop_dump_stream) 3802 { 3803 fprintf (loop_dump_stream, 3804 "Loop iterations: Final value not constant "); 3805 print_simple_rtl (loop_dump_stream, final_value); 3806 fprintf (loop_dump_stream, ".\n"); 3807 } 3808 return 0; 3809 } 3810 else if (comparison_code == EQ) 3811 { 3812 rtx inc_once; 3813 3814 if (loop_dump_stream) 3815 fprintf (loop_dump_stream, "Loop iterations: EQ comparison loop.\n"); 3816 3817 inc_once = gen_int_mode (INTVAL (initial_value) + INTVAL (increment), 3818 GET_MODE (iteration_var)); 3819 3820 if (inc_once == final_value) 3821 { 3822 /* The iterator value once through the loop is equal to the 3823 comparision value. Either we have an infinite loop, or 3824 we'll loop twice. */ 3825 if (increment == const0_rtx) 3826 return 0; 3827 loop_info->n_iterations = 2; 3828 } 3829 else 3830 loop_info->n_iterations = 1; 3831 3832 if (GET_CODE (loop_info->initial_value) == CONST_INT) 3833 loop_info->final_value 3834 = gen_int_mode ((INTVAL (loop_info->initial_value) 3835 + loop_info->n_iterations * INTVAL (increment)), 3836 GET_MODE (iteration_var)); 3837 else 3838 loop_info->final_value 3839 = plus_constant (loop_info->initial_value, 3840 loop_info->n_iterations * INTVAL (increment)); 3841 loop_info->final_equiv_value 3842 = gen_int_mode ((INTVAL (initial_value) 3843 + loop_info->n_iterations * INTVAL (increment)), 3844 GET_MODE (iteration_var)); 3845 return loop_info->n_iterations; 3846 } 3847 3848 /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */ 3849 if (unsigned_p) 3850 final_larger 3851 = ((unsigned HOST_WIDE_INT) INTVAL (final_value) 3852 > (unsigned HOST_WIDE_INT) INTVAL (initial_value)) 3853 - ((unsigned HOST_WIDE_INT) INTVAL (final_value) 3854 < (unsigned HOST_WIDE_INT) INTVAL (initial_value)); 3855 else 3856 final_larger = (INTVAL (final_value) > INTVAL (initial_value)) 3857 - (INTVAL (final_value) < INTVAL (initial_value)); 3858 3859 if (INTVAL (increment) > 0) 3860 increment_dir = 1; 3861 else if (INTVAL (increment) == 0) 3862 increment_dir = 0; 3863 else 3864 increment_dir = -1; 3865 3866 /* There are 27 different cases: compare_dir = -1, 0, 1; 3867 final_larger = -1, 0, 1; increment_dir = -1, 0, 1. 3868 There are 4 normal cases, 4 reverse cases (where the iteration variable 3869 will overflow before the loop exits), 4 infinite loop cases, and 15 3870 immediate exit (0 or 1 iteration depending on loop type) cases. 3871 Only try to optimize the normal cases. */ 3872 3873 /* (compare_dir/final_larger/increment_dir) 3874 Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1) 3875 Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1) 3876 Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0) 3877 Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */ 3878 3879 /* ?? If the meaning of reverse loops (where the iteration variable 3880 will overflow before the loop exits) is undefined, then could 3881 eliminate all of these special checks, and just always assume 3882 the loops are normal/immediate/infinite. Note that this means 3883 the sign of increment_dir does not have to be known. Also, 3884 since it does not really hurt if immediate exit loops or infinite loops 3885 are optimized, then that case could be ignored also, and hence all 3886 loops can be optimized. 3887 3888 According to ANSI Spec, the reverse loop case result is undefined, 3889 because the action on overflow is undefined. 3890 3891 See also the special test for NE loops below. */ 3892 3893 if (final_larger == increment_dir && final_larger != 0 3894 && (final_larger == compare_dir || compare_dir == 0)) 3895 /* Normal case. */ 3896 ; 3897 else 3898 { 3899 if (loop_dump_stream) 3900 fprintf (loop_dump_stream, "Loop iterations: Not normal loop.\n"); 3901 return 0; 3902 } 3903 3904 /* Calculate the number of iterations, final_value is only an approximation, 3905 so correct for that. Note that abs_diff and n_iterations are 3906 unsigned, because they can be as large as 2^n - 1. */ 3907 3908 inc = INTVAL (increment); 3909 if (inc > 0) 3910 { 3911 abs_diff = INTVAL (final_value) - INTVAL (initial_value); 3912 abs_inc = inc; 3913 } 3914 else if (inc < 0) 3915 { 3916 abs_diff = INTVAL (initial_value) - INTVAL (final_value); 3917 abs_inc = -inc; 3918 } 3919 else 3920 abort (); 3921 3922 /* Given that iteration_var is going to iterate over its own mode, 3923 not HOST_WIDE_INT, disregard higher bits that might have come 3924 into the picture due to sign extension of initial and final 3925 values. */ 3926 abs_diff &= ((unsigned HOST_WIDE_INT) 1 3927 << (GET_MODE_BITSIZE (GET_MODE (iteration_var)) - 1) 3928 << 1) - 1; 3929 3930 /* For NE tests, make sure that the iteration variable won't miss 3931 the final value. If abs_diff mod abs_incr is not zero, then the 3932 iteration variable will overflow before the loop exits, and we 3933 can not calculate the number of iterations. */ 3934 if (compare_dir == 0 && (abs_diff % abs_inc) != 0) 3935 return 0; 3936 3937 /* Note that the number of iterations could be calculated using 3938 (abs_diff + abs_inc - 1) / abs_inc, provided care was taken to 3939 handle potential overflow of the summation. */ 3940 loop_info->n_iterations = abs_diff / abs_inc + ((abs_diff % abs_inc) != 0); 3941 return loop_info->n_iterations; 3942 } 3943 3944 /* Replace uses of split bivs with their split pseudo register. This is 3945 for original instructions which remain after loop unrolling without 3946 copying. */ 3947 3948 static rtx 3949 remap_split_bivs (loop, x) 3950 struct loop *loop; 3951 rtx x; 3952 { 3953 struct loop_ivs *ivs = LOOP_IVS (loop); 3954 enum rtx_code code; 3955 int i; 3956 const char *fmt; 3957 3958 if (x == 0) 3959 return x; 3960 3961 code = GET_CODE (x); 3962 switch (code) 3963 { 3964 case SCRATCH: 3965 case PC: 3966 case CC0: 3967 case CONST_INT: 3968 case CONST_DOUBLE: 3969 case CONST: 3970 case SYMBOL_REF: 3971 case LABEL_REF: 3972 return x; 3973 3974 case REG: 3975 #if 0 3976 /* If non-reduced/final-value givs were split, then this would also 3977 have to remap those givs also. */ 3978 #endif 3979 if (REGNO (x) < ivs->n_regs 3980 && REG_IV_TYPE (ivs, REGNO (x)) == BASIC_INDUCT) 3981 return REG_IV_CLASS (ivs, REGNO (x))->biv->src_reg; 3982 break; 3983 3984 default: 3985 break; 3986 } 3987 3988 fmt = GET_RTX_FORMAT (code); 3989 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 3990 { 3991 if (fmt[i] == 'e') 3992 XEXP (x, i) = remap_split_bivs (loop, XEXP (x, i)); 3993 else if (fmt[i] == 'E') 3994 { 3995 int j; 3996 for (j = 0; j < XVECLEN (x, i); j++) 3997 XVECEXP (x, i, j) = remap_split_bivs (loop, XVECEXP (x, i, j)); 3998 } 3999 } 4000 return x; 4001 } 4002 4003 /* If FIRST_UID is a set of REGNO, and FIRST_UID dominates LAST_UID (e.g. 4004 FIST_UID is always executed if LAST_UID is), then return 1. Otherwise 4005 return 0. COPY_START is where we can start looking for the insns 4006 FIRST_UID and LAST_UID. COPY_END is where we stop looking for these 4007 insns. 4008 4009 If there is no JUMP_INSN between LOOP_START and FIRST_UID, then FIRST_UID 4010 must dominate LAST_UID. 4011 4012 If there is a CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID 4013 may not dominate LAST_UID. 4014 4015 If there is no CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID 4016 must dominate LAST_UID. */ 4017 4018 int 4019 set_dominates_use (regno, first_uid, last_uid, copy_start, copy_end) 4020 int regno; 4021 int first_uid; 4022 int last_uid; 4023 rtx copy_start; 4024 rtx copy_end; 4025 { 4026 int passed_jump = 0; 4027 rtx p = NEXT_INSN (copy_start); 4028 4029 while (INSN_UID (p) != first_uid) 4030 { 4031 if (GET_CODE (p) == JUMP_INSN) 4032 passed_jump = 1; 4033 /* Could not find FIRST_UID. */ 4034 if (p == copy_end) 4035 return 0; 4036 p = NEXT_INSN (p); 4037 } 4038 4039 /* Verify that FIRST_UID is an insn that entirely sets REGNO. */ 4040 if (! INSN_P (p) || ! dead_or_set_regno_p (p, regno)) 4041 return 0; 4042 4043 /* FIRST_UID is always executed. */ 4044 if (passed_jump == 0) 4045 return 1; 4046 4047 while (INSN_UID (p) != last_uid) 4048 { 4049 /* If we see a CODE_LABEL between FIRST_UID and LAST_UID, then we 4050 can not be sure that FIRST_UID dominates LAST_UID. */ 4051 if (GET_CODE (p) == CODE_LABEL) 4052 return 0; 4053 /* Could not find LAST_UID, but we reached the end of the loop, so 4054 it must be safe. */ 4055 else if (p == copy_end) 4056 return 1; 4057 p = NEXT_INSN (p); 4058 } 4059 4060 /* FIRST_UID is always executed if LAST_UID is executed. */ 4061 return 1; 4062 } 4063 4064 /* This routine is called when the number of iterations for the unrolled 4065 loop is one. The goal is to identify a loop that begins with an 4066 unconditional branch to the loop continuation note (or a label just after). 4067 In this case, the unconditional branch that starts the loop needs to be 4068 deleted so that we execute the single iteration. */ 4069 4070 static rtx 4071 ujump_to_loop_cont (loop_start, loop_cont) 4072 rtx loop_start; 4073 rtx loop_cont; 4074 { 4075 rtx x, label, label_ref; 4076 4077 /* See if loop start, or the next insn is an unconditional jump. */ 4078 loop_start = next_nonnote_insn (loop_start); 4079 4080 x = pc_set (loop_start); 4081 if (!x) 4082 return NULL_RTX; 4083 4084 label_ref = SET_SRC (x); 4085 if (!label_ref) 4086 return NULL_RTX; 4087 4088 /* Examine insn after loop continuation note. Return if not a label. */ 4089 label = next_nonnote_insn (loop_cont); 4090 if (label == 0 || GET_CODE (label) != CODE_LABEL) 4091 return NULL_RTX; 4092 4093 /* Return the loop start if the branch label matches the code label. */ 4094 if (CODE_LABEL_NUMBER (label) == CODE_LABEL_NUMBER (XEXP (label_ref, 0))) 4095 return loop_start; 4096 else 4097 return NULL_RTX; 4098 } 4099