1 /* Try to unroll loops, and split induction variables.
2 Copyright (C) 1992, 1993, 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, Cygnus Support/UC Berkeley.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
22
23 /* Try to unroll a loop, and split induction variables.
24
25 Loops for which the number of iterations can be calculated exactly are
26 handled specially. If the number of iterations times the insn_count is
27 less than MAX_UNROLLED_INSNS, then the loop is unrolled completely.
28 Otherwise, we try to unroll the loop a number of times modulo the number
29 of iterations, so that only one exit test will be needed. It is unrolled
30 a number of times approximately equal to MAX_UNROLLED_INSNS divided by
31 the insn count.
32
33 Otherwise, if the number of iterations can be calculated exactly at
34 run time, and the loop is always entered at the top, then we try to
35 precondition the loop. That is, at run time, calculate how many times
36 the loop will execute, and then execute the loop body a few times so
37 that the remaining iterations will be some multiple of 4 (or 2 if the
38 loop is large). Then fall through to a loop unrolled 4 (or 2) times,
39 with only one exit test needed at the end of the loop.
40
41 Otherwise, if the number of iterations can not be calculated exactly,
42 not even at run time, then we still unroll the loop a number of times
43 approximately equal to MAX_UNROLLED_INSNS divided by the insn count,
44 but there must be an exit test after each copy of the loop body.
45
46 For each induction variable, which is dead outside the loop (replaceable)
47 or for which we can easily calculate the final value, if we can easily
48 calculate its value at each place where it is set as a function of the
49 current loop unroll count and the variable's value at loop entry, then
50 the induction variable is split into `N' different variables, one for
51 each copy of the loop body. One variable is live across the backward
52 branch, and the others are all calculated as a function of this variable.
53 This helps eliminate data dependencies, and leads to further opportunities
54 for cse. */
55
56 /* Possible improvements follow: */
57
58 /* ??? Add an extra pass somewhere to determine whether unrolling will
59 give any benefit. E.g. after generating all unrolled insns, compute the
60 cost of all insns and compare against cost of insns in rolled loop.
61
62 - On traditional architectures, unrolling a non-constant bound loop
63 is a win if there is a giv whose only use is in memory addresses, the
64 memory addresses can be split, and hence giv increments can be
65 eliminated.
66 - It is also a win if the loop is executed many times, and preconditioning
67 can be performed for the loop.
68 Add code to check for these and similar cases. */
69
70 /* ??? Improve control of which loops get unrolled. Could use profiling
71 info to only unroll the most commonly executed loops. Perhaps have
72 a user specifyable option to control the amount of code expansion,
73 or the percent of loops to consider for unrolling. Etc. */
74
75 /* ??? Look at the register copies inside the loop to see if they form a
76 simple permutation. If so, iterate the permutation until it gets back to
77 the start state. This is how many times we should unroll the loop, for
78 best results, because then all register copies can be eliminated.
79 For example, the lisp nreverse function should be unrolled 3 times
80 while (this)
81 {
82 next = this->cdr;
83 this->cdr = prev;
84 prev = this;
85 this = next;
86 }
87
88 ??? The number of times to unroll the loop may also be based on data
89 references in the loop. For example, if we have a loop that references
90 x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */
91
92 /* ??? Add some simple linear equation solving capability so that we can
93 determine the number of loop iterations for more complex loops.
94 For example, consider this loop from gdb
95 #define SWAP_TARGET_AND_HOST(buffer,len)
96 {
97 char tmp;
98 char *p = (char *) buffer;
99 char *q = ((char *) buffer) + len - 1;
100 int iterations = (len + 1) >> 1;
101 int i;
102 for (p; p < q; p++, q--;)
103 {
104 tmp = *q;
105 *q = *p;
106 *p = tmp;
107 }
108 }
109 Note that:
110 start value = p = &buffer + current_iteration
111 end value = q = &buffer + len - 1 - current_iteration
112 Given the loop exit test of "p < q", then there must be "q - p" iterations,
113 set equal to zero and solve for number of iterations:
114 q - p = len - 1 - 2*current_iteration = 0
115 current_iteration = (len - 1) / 2
116 Hence, there are (len - 1) / 2 (rounded up to the nearest integer)
117 iterations of this loop. */
118
119 /* ??? Currently, no labels are marked as loop invariant when doing loop
120 unrolling. This is because an insn inside the loop, that loads the address
121 of a label inside the loop into a register, could be moved outside the loop
122 by the invariant code motion pass if labels were invariant. If the loop
123 is subsequently unrolled, the code will be wrong because each unrolled
124 body of the loop will use the same address, whereas each actually needs a
125 different address. A case where this happens is when a loop containing
126 a switch statement is unrolled.
127
128 It would be better to let labels be considered invariant. When we
129 unroll loops here, check to see if any insns using a label local to the
130 loop were moved before the loop. If so, then correct the problem, by
131 moving the insn back into the loop, or perhaps replicate the insn before
132 the loop, one copy for each time the loop is unrolled. */
133
134 #include "config.h"
135 #include "system.h"
136 #include "rtl.h"
137 #include "tm_p.h"
138 #include "insn-config.h"
139 #include "integrate.h"
140 #include "regs.h"
141 #include "recog.h"
142 #include "flags.h"
143 #include "function.h"
144 #include "expr.h"
145 #include "loop.h"
146 #include "toplev.h"
147 #include "hard-reg-set.h"
148 #include "basic-block.h"
149 #include "predict.h"
150 #include "params.h"
151
152 /* The prime factors looked for when trying to unroll a loop by some
153 number which is modulo the total number of iterations. Just checking
154 for these 4 prime factors will find at least one factor for 75% of
155 all numbers theoretically. Practically speaking, this will succeed
156 almost all of the time since loops are generally a multiple of 2
157 and/or 5. */
158
159 #define NUM_FACTORS 4
160
161 static struct _factor { const int factor; int count; }
162 factors[NUM_FACTORS] = { {2, 0}, {3, 0}, {5, 0}, {7, 0}};
163
164 /* Describes the different types of loop unrolling performed. */
165
166 enum unroll_types
167 {
168 UNROLL_COMPLETELY,
169 UNROLL_MODULO,
170 UNROLL_NAIVE
171 };
172
173 /* Indexed by register number, if nonzero, then it contains a pointer
174 to a struct induction for a DEST_REG giv which has been combined with
175 one of more address givs. This is needed because whenever such a DEST_REG
176 giv is modified, we must modify the value of all split address givs
177 that were combined with this DEST_REG giv. */
178
179 static struct induction **addr_combined_regs;
180
181 /* Indexed by register number, if this is a splittable induction variable,
182 then this will hold the current value of the register, which depends on the
183 iteration number. */
184
185 static rtx *splittable_regs;
186
187 /* Indexed by register number, if this is a splittable induction variable,
188 then this will hold the number of instructions in the loop that modify
189 the induction variable. Used to ensure that only the last insn modifying
190 a split iv will update the original iv of the dest. */
191
192 static int *splittable_regs_updates;
193
194 /* Forward declarations. */
195
196 static rtx simplify_cmp_and_jump_insns PARAMS ((enum rtx_code,
197 enum machine_mode,
198 rtx, rtx, rtx));
199 static void init_reg_map PARAMS ((struct inline_remap *, int));
200 static rtx calculate_giv_inc PARAMS ((rtx, rtx, unsigned int));
201 static rtx initial_reg_note_copy PARAMS ((rtx, struct inline_remap *));
202 static void final_reg_note_copy PARAMS ((rtx *, struct inline_remap *));
203 static void copy_loop_body PARAMS ((struct loop *, rtx, rtx,
204 struct inline_remap *, rtx, int,
205 enum unroll_types, rtx, rtx, rtx, rtx));
206 static int find_splittable_regs PARAMS ((const struct loop *,
207 enum unroll_types, int));
208 static int find_splittable_givs PARAMS ((const struct loop *,
209 struct iv_class *, enum unroll_types,
210 rtx, int));
211 static int reg_dead_after_loop PARAMS ((const struct loop *, rtx));
212 static rtx fold_rtx_mult_add PARAMS ((rtx, rtx, rtx, enum machine_mode));
213 static rtx remap_split_bivs PARAMS ((struct loop *, rtx));
214 static rtx find_common_reg_term PARAMS ((rtx, rtx));
215 static rtx subtract_reg_term PARAMS ((rtx, rtx));
216 static rtx loop_find_equiv_value PARAMS ((const struct loop *, rtx));
217 static rtx ujump_to_loop_cont PARAMS ((rtx, rtx));
218
219 /* Try to unroll one loop and split induction variables in the loop.
220
221 The loop is described by the arguments LOOP and INSN_COUNT.
222 STRENGTH_REDUCTION_P indicates whether information generated in the
223 strength reduction pass is available.
224
225 This function is intended to be called from within `strength_reduce'
226 in loop.c. */
227
228 void
unroll_loop(loop,insn_count,strength_reduce_p)229 unroll_loop (loop, insn_count, strength_reduce_p)
230 struct loop *loop;
231 int insn_count;
232 int strength_reduce_p;
233 {
234 struct loop_info *loop_info = LOOP_INFO (loop);
235 struct loop_ivs *ivs = LOOP_IVS (loop);
236 int i, j;
237 unsigned int r;
238 unsigned HOST_WIDE_INT temp;
239 int unroll_number = 1;
240 rtx copy_start, copy_end;
241 rtx insn, sequence, pattern, tem;
242 int max_labelno, max_insnno;
243 rtx insert_before;
244 struct inline_remap *map;
245 char *local_label = NULL;
246 char *local_regno;
247 unsigned int max_local_regnum;
248 unsigned int maxregnum;
249 rtx exit_label = 0;
250 rtx start_label;
251 struct iv_class *bl;
252 int splitting_not_safe = 0;
253 enum unroll_types unroll_type = UNROLL_NAIVE;
254 int loop_preconditioned = 0;
255 rtx safety_label;
256 /* This points to the last real insn in the loop, which should be either
257 a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional
258 jumps). */
259 rtx last_loop_insn;
260 rtx loop_start = loop->start;
261 rtx loop_end = loop->end;
262
263 /* Don't bother unrolling huge loops. Since the minimum factor is
264 two, loops greater than one half of MAX_UNROLLED_INSNS will never
265 be unrolled. */
266 if (insn_count > MAX_UNROLLED_INSNS / 2)
267 {
268 if (loop_dump_stream)
269 fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n");
270 return;
271 }
272
273 /* Determine type of unroll to perform. Depends on the number of iterations
274 and the size of the loop. */
275
276 /* If there is no strength reduce info, then set
277 loop_info->n_iterations to zero. This can happen if
278 strength_reduce can't find any bivs in the loop. A value of zero
279 indicates that the number of iterations could not be calculated. */
280
281 if (! strength_reduce_p)
282 loop_info->n_iterations = 0;
283
284 if (loop_dump_stream && loop_info->n_iterations > 0)
285 {
286 fputs ("Loop unrolling: ", loop_dump_stream);
287 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
288 loop_info->n_iterations);
289 fputs (" iterations.\n", loop_dump_stream);
290 }
291
292 /* Find and save a pointer to the last nonnote insn in the loop. */
293
294 last_loop_insn = prev_nonnote_insn (loop_end);
295
296 /* Calculate how many times to unroll the loop. Indicate whether or
297 not the loop is being completely unrolled. */
298
299 if (loop_info->n_iterations == 1)
300 {
301 /* Handle the case where the loop begins with an unconditional
302 jump to the loop condition. Make sure to delete the jump
303 insn, otherwise the loop body will never execute. */
304
305 /* FIXME this actually checks for a jump to the continue point, which
306 is not the same as the condition in a for loop. As a result, this
307 optimization fails for most for loops. We should really use flow
308 information rather than instruction pattern matching. */
309 rtx ujump = ujump_to_loop_cont (loop->start, loop->cont);
310
311 /* If number of iterations is exactly 1, then eliminate the compare and
312 branch at the end of the loop since they will never be taken.
313 Then return, since no other action is needed here. */
314
315 /* If the last instruction is not a BARRIER or a JUMP_INSN, then
316 don't do anything. */
317
318 if (GET_CODE (last_loop_insn) == BARRIER)
319 {
320 /* Delete the jump insn. This will delete the barrier also. */
321 last_loop_insn = PREV_INSN (last_loop_insn);
322 }
323
324 if (ujump && GET_CODE (last_loop_insn) == JUMP_INSN)
325 {
326 #ifdef HAVE_cc0
327 rtx prev = PREV_INSN (last_loop_insn);
328 #endif
329 delete_related_insns (last_loop_insn);
330 #ifdef HAVE_cc0
331 /* The immediately preceding insn may be a compare which must be
332 deleted. */
333 if (only_sets_cc0_p (prev))
334 delete_related_insns (prev);
335 #endif
336
337 delete_related_insns (ujump);
338
339 /* Remove the loop notes since this is no longer a loop. */
340 if (loop->vtop)
341 delete_related_insns (loop->vtop);
342 if (loop->cont)
343 delete_related_insns (loop->cont);
344 if (loop_start)
345 delete_related_insns (loop_start);
346 if (loop_end)
347 delete_related_insns (loop_end);
348
349 return;
350 }
351 }
352
353 if (loop_info->n_iterations > 0
354 /* Avoid overflow in the next expression. */
355 && loop_info->n_iterations < (unsigned) MAX_UNROLLED_INSNS
356 && loop_info->n_iterations * insn_count < (unsigned) MAX_UNROLLED_INSNS)
357 {
358 unroll_number = loop_info->n_iterations;
359 unroll_type = UNROLL_COMPLETELY;
360 }
361 else if (loop_info->n_iterations > 0)
362 {
363 /* Try to factor the number of iterations. Don't bother with the
364 general case, only using 2, 3, 5, and 7 will get 75% of all
365 numbers theoretically, and almost all in practice. */
366
367 for (i = 0; i < NUM_FACTORS; i++)
368 factors[i].count = 0;
369
370 temp = loop_info->n_iterations;
371 for (i = NUM_FACTORS - 1; i >= 0; i--)
372 while (temp % factors[i].factor == 0)
373 {
374 factors[i].count++;
375 temp = temp / factors[i].factor;
376 }
377
378 /* Start with the larger factors first so that we generally
379 get lots of unrolling. */
380
381 unroll_number = 1;
382 temp = insn_count;
383 for (i = 3; i >= 0; i--)
384 while (factors[i].count--)
385 {
386 if (temp * factors[i].factor < (unsigned) MAX_UNROLLED_INSNS)
387 {
388 unroll_number *= factors[i].factor;
389 temp *= factors[i].factor;
390 }
391 else
392 break;
393 }
394
395 /* If we couldn't find any factors, then unroll as in the normal
396 case. */
397 if (unroll_number == 1)
398 {
399 if (loop_dump_stream)
400 fprintf (loop_dump_stream, "Loop unrolling: No factors found.\n");
401 }
402 else
403 unroll_type = UNROLL_MODULO;
404 }
405
406 /* Default case, calculate number of times to unroll loop based on its
407 size. */
408 if (unroll_type == UNROLL_NAIVE)
409 {
410 if (8 * insn_count < MAX_UNROLLED_INSNS)
411 unroll_number = 8;
412 else if (4 * insn_count < MAX_UNROLLED_INSNS)
413 unroll_number = 4;
414 else
415 unroll_number = 2;
416 }
417
418 /* Now we know how many times to unroll the loop. */
419
420 if (loop_dump_stream)
421 fprintf (loop_dump_stream, "Unrolling loop %d times.\n", unroll_number);
422
423 if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO)
424 {
425 /* Loops of these types can start with jump down to the exit condition
426 in rare circumstances.
427
428 Consider a pair of nested loops where the inner loop is part
429 of the exit code for the outer loop.
430
431 In this case jump.c will not duplicate the exit test for the outer
432 loop, so it will start with a jump to the exit code.
433
434 Then consider if the inner loop turns out to iterate once and
435 only once. We will end up deleting the jumps associated with
436 the inner loop. However, the loop notes are not removed from
437 the instruction stream.
438
439 And finally assume that we can compute the number of iterations
440 for the outer loop.
441
442 In this case unroll may want to unroll the outer loop even though
443 it starts with a jump to the outer loop's exit code.
444
445 We could try to optimize this case, but it hardly seems worth it.
446 Just return without unrolling the loop in such cases. */
447
448 insn = loop_start;
449 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
450 insn = NEXT_INSN (insn);
451 if (GET_CODE (insn) == JUMP_INSN)
452 return;
453 }
454
455 if (unroll_type == UNROLL_COMPLETELY)
456 {
457 /* Completely unrolling the loop: Delete the compare and branch at
458 the end (the last two instructions). This delete must done at the
459 very end of loop unrolling, to avoid problems with calls to
460 back_branch_in_range_p, which is called by find_splittable_regs.
461 All increments of splittable bivs/givs are changed to load constant
462 instructions. */
463
464 copy_start = loop_start;
465
466 /* Set insert_before to the instruction immediately after the JUMP_INSN
467 (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of
468 the loop will be correctly handled by copy_loop_body. */
469 insert_before = NEXT_INSN (last_loop_insn);
470
471 /* Set copy_end to the insn before the jump at the end of the loop. */
472 if (GET_CODE (last_loop_insn) == BARRIER)
473 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
474 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
475 {
476 copy_end = PREV_INSN (last_loop_insn);
477 #ifdef HAVE_cc0
478 /* The instruction immediately before the JUMP_INSN may be a compare
479 instruction which we do not want to copy. */
480 if (sets_cc0_p (PREV_INSN (copy_end)))
481 copy_end = PREV_INSN (copy_end);
482 #endif
483 }
484 else
485 {
486 /* We currently can't unroll a loop if it doesn't end with a
487 JUMP_INSN. There would need to be a mechanism that recognizes
488 this case, and then inserts a jump after each loop body, which
489 jumps to after the last loop body. */
490 if (loop_dump_stream)
491 fprintf (loop_dump_stream,
492 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
493 return;
494 }
495 }
496 else if (unroll_type == UNROLL_MODULO)
497 {
498 /* Partially unrolling the loop: The compare and branch at the end
499 (the last two instructions) must remain. Don't copy the compare
500 and branch instructions at the end of the loop. Insert the unrolled
501 code immediately before the compare/branch at the end so that the
502 code will fall through to them as before. */
503
504 copy_start = loop_start;
505
506 /* Set insert_before to the jump insn at the end of the loop.
507 Set copy_end to before the jump insn at the end of the loop. */
508 if (GET_CODE (last_loop_insn) == BARRIER)
509 {
510 insert_before = PREV_INSN (last_loop_insn);
511 copy_end = PREV_INSN (insert_before);
512 }
513 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
514 {
515 insert_before = last_loop_insn;
516 #ifdef HAVE_cc0
517 /* The instruction immediately before the JUMP_INSN may be a compare
518 instruction which we do not want to copy or delete. */
519 if (sets_cc0_p (PREV_INSN (insert_before)))
520 insert_before = PREV_INSN (insert_before);
521 #endif
522 copy_end = PREV_INSN (insert_before);
523 }
524 else
525 {
526 /* We currently can't unroll a loop if it doesn't end with a
527 JUMP_INSN. There would need to be a mechanism that recognizes
528 this case, and then inserts a jump after each loop body, which
529 jumps to after the last loop body. */
530 if (loop_dump_stream)
531 fprintf (loop_dump_stream,
532 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
533 return;
534 }
535 }
536 else
537 {
538 /* Normal case: Must copy the compare and branch instructions at the
539 end of the loop. */
540
541 if (GET_CODE (last_loop_insn) == BARRIER)
542 {
543 /* Loop ends with an unconditional jump and a barrier.
544 Handle this like above, don't copy jump and barrier.
545 This is not strictly necessary, but doing so prevents generating
546 unconditional jumps to an immediately following label.
547
548 This will be corrected below if the target of this jump is
549 not the start_label. */
550
551 insert_before = PREV_INSN (last_loop_insn);
552 copy_end = PREV_INSN (insert_before);
553 }
554 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
555 {
556 /* Set insert_before to immediately after the JUMP_INSN, so that
557 NOTEs at the end of the loop will be correctly handled by
558 copy_loop_body. */
559 insert_before = NEXT_INSN (last_loop_insn);
560 copy_end = last_loop_insn;
561 }
562 else
563 {
564 /* We currently can't unroll a loop if it doesn't end with a
565 JUMP_INSN. There would need to be a mechanism that recognizes
566 this case, and then inserts a jump after each loop body, which
567 jumps to after the last loop body. */
568 if (loop_dump_stream)
569 fprintf (loop_dump_stream,
570 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
571 return;
572 }
573
574 /* If copying exit test branches because they can not be eliminated,
575 then must convert the fall through case of the branch to a jump past
576 the end of the loop. Create a label to emit after the loop and save
577 it for later use. Do not use the label after the loop, if any, since
578 it might be used by insns outside the loop, or there might be insns
579 added before it later by final_[bg]iv_value which must be after
580 the real exit label. */
581 exit_label = gen_label_rtx ();
582
583 insn = loop_start;
584 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
585 insn = NEXT_INSN (insn);
586
587 if (GET_CODE (insn) == JUMP_INSN)
588 {
589 /* The loop starts with a jump down to the exit condition test.
590 Start copying the loop after the barrier following this
591 jump insn. */
592 copy_start = NEXT_INSN (insn);
593
594 /* Splitting induction variables doesn't work when the loop is
595 entered via a jump to the bottom, because then we end up doing
596 a comparison against a new register for a split variable, but
597 we did not execute the set insn for the new register because
598 it was skipped over. */
599 splitting_not_safe = 1;
600 if (loop_dump_stream)
601 fprintf (loop_dump_stream,
602 "Splitting not safe, because loop not entered at top.\n");
603 }
604 else
605 copy_start = loop_start;
606 }
607
608 /* This should always be the first label in the loop. */
609 start_label = NEXT_INSN (copy_start);
610 /* There may be a line number note and/or a loop continue note here. */
611 while (GET_CODE (start_label) == NOTE)
612 start_label = NEXT_INSN (start_label);
613 if (GET_CODE (start_label) != CODE_LABEL)
614 {
615 /* This can happen as a result of jump threading. If the first insns in
616 the loop test the same condition as the loop's backward jump, or the
617 opposite condition, then the backward jump will be modified to point
618 to elsewhere, and the loop's start label is deleted.
619
620 This case currently can not be handled by the loop unrolling code. */
621
622 if (loop_dump_stream)
623 fprintf (loop_dump_stream,
624 "Unrolling failure: unknown insns between BEG note and loop label.\n");
625 return;
626 }
627 if (LABEL_NAME (start_label))
628 {
629 /* The jump optimization pass must have combined the original start label
630 with a named label for a goto. We can't unroll this case because
631 jumps which go to the named label must be handled differently than
632 jumps to the loop start, and it is impossible to differentiate them
633 in this case. */
634 if (loop_dump_stream)
635 fprintf (loop_dump_stream,
636 "Unrolling failure: loop start label is gone\n");
637 return;
638 }
639
640 if (unroll_type == UNROLL_NAIVE
641 && GET_CODE (last_loop_insn) == BARRIER
642 && GET_CODE (PREV_INSN (last_loop_insn)) == JUMP_INSN
643 && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn)))
644 {
645 /* In this case, we must copy the jump and barrier, because they will
646 not be converted to jumps to an immediately following label. */
647
648 insert_before = NEXT_INSN (last_loop_insn);
649 copy_end = last_loop_insn;
650 }
651
652 if (unroll_type == UNROLL_NAIVE
653 && GET_CODE (last_loop_insn) == JUMP_INSN
654 && start_label != JUMP_LABEL (last_loop_insn))
655 {
656 /* ??? The loop ends with a conditional branch that does not branch back
657 to the loop start label. In this case, we must emit an unconditional
658 branch to the loop exit after emitting the final branch.
659 copy_loop_body does not have support for this currently, so we
660 give up. It doesn't seem worthwhile to unroll anyways since
661 unrolling would increase the number of branch instructions
662 executed. */
663 if (loop_dump_stream)
664 fprintf (loop_dump_stream,
665 "Unrolling failure: final conditional branch not to loop start\n");
666 return;
667 }
668
669 /* Allocate a translation table for the labels and insn numbers.
670 They will be filled in as we copy the insns in the loop. */
671
672 max_labelno = max_label_num ();
673 max_insnno = get_max_uid ();
674
675 /* Various paths through the unroll code may reach the "egress" label
676 without initializing fields within the map structure.
677
678 To be safe, we use xcalloc to zero the memory. */
679 map = (struct inline_remap *) xcalloc (1, sizeof (struct inline_remap));
680
681 /* Allocate the label map. */
682
683 if (max_labelno > 0)
684 {
685 map->label_map = (rtx *) xcalloc (max_labelno, sizeof (rtx));
686 local_label = (char *) xcalloc (max_labelno, sizeof (char));
687 }
688
689 /* Search the loop and mark all local labels, i.e. the ones which have to
690 be distinct labels when copied. For all labels which might be
691 non-local, set their label_map entries to point to themselves.
692 If they happen to be local their label_map entries will be overwritten
693 before the loop body is copied. The label_map entries for local labels
694 will be set to a different value each time the loop body is copied. */
695
696 for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn))
697 {
698 rtx note;
699
700 if (GET_CODE (insn) == CODE_LABEL)
701 local_label[CODE_LABEL_NUMBER (insn)] = 1;
702 else if (GET_CODE (insn) == JUMP_INSN)
703 {
704 if (JUMP_LABEL (insn))
705 set_label_in_map (map,
706 CODE_LABEL_NUMBER (JUMP_LABEL (insn)),
707 JUMP_LABEL (insn));
708 else if (GET_CODE (PATTERN (insn)) == ADDR_VEC
709 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
710 {
711 rtx pat = PATTERN (insn);
712 int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC;
713 int len = XVECLEN (pat, diff_vec_p);
714 rtx label;
715
716 for (i = 0; i < len; i++)
717 {
718 label = XEXP (XVECEXP (pat, diff_vec_p, i), 0);
719 set_label_in_map (map, CODE_LABEL_NUMBER (label), label);
720 }
721 }
722 }
723 if ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)))
724 set_label_in_map (map, CODE_LABEL_NUMBER (XEXP (note, 0)),
725 XEXP (note, 0));
726 }
727
728 /* Allocate space for the insn map. */
729
730 map->insn_map = (rtx *) xmalloc (max_insnno * sizeof (rtx));
731
732 /* Set this to zero, to indicate that we are doing loop unrolling,
733 not function inlining. */
734 map->inline_target = 0;
735
736 /* The register and constant maps depend on the number of registers
737 present, so the final maps can't be created until after
738 find_splittable_regs is called. However, they are needed for
739 preconditioning, so we create temporary maps when preconditioning
740 is performed. */
741
742 /* The preconditioning code may allocate two new pseudo registers. */
743 maxregnum = max_reg_num ();
744
745 /* local_regno is only valid for regnos < max_local_regnum. */
746 max_local_regnum = maxregnum;
747
748 /* Allocate and zero out the splittable_regs and addr_combined_regs
749 arrays. These must be zeroed here because they will be used if
750 loop preconditioning is performed, and must be zero for that case.
751
752 It is safe to do this here, since the extra registers created by the
753 preconditioning code and find_splittable_regs will never be used
754 to access the splittable_regs[] and addr_combined_regs[] arrays. */
755
756 splittable_regs = (rtx *) xcalloc (maxregnum, sizeof (rtx));
757 splittable_regs_updates = (int *) xcalloc (maxregnum, sizeof (int));
758 addr_combined_regs
759 = (struct induction **) xcalloc (maxregnum, sizeof (struct induction *));
760 local_regno = (char *) xcalloc (maxregnum, sizeof (char));
761
762 /* Mark all local registers, i.e. the ones which are referenced only
763 inside the loop. */
764 if (INSN_UID (copy_end) < max_uid_for_loop)
765 {
766 int copy_start_luid = INSN_LUID (copy_start);
767 int copy_end_luid = INSN_LUID (copy_end);
768
769 /* If a register is used in the jump insn, we must not duplicate it
770 since it will also be used outside the loop. */
771 if (GET_CODE (copy_end) == JUMP_INSN)
772 copy_end_luid--;
773
774 /* If we have a target that uses cc0, then we also must not duplicate
775 the insn that sets cc0 before the jump insn, if one is present. */
776 #ifdef HAVE_cc0
777 if (GET_CODE (copy_end) == JUMP_INSN
778 && sets_cc0_p (PREV_INSN (copy_end)))
779 copy_end_luid--;
780 #endif
781
782 /* If copy_start points to the NOTE that starts the loop, then we must
783 use the next luid, because invariant pseudo-regs moved out of the loop
784 have their lifetimes modified to start here, but they are not safe
785 to duplicate. */
786 if (copy_start == loop_start)
787 copy_start_luid++;
788
789 /* If a pseudo's lifetime is entirely contained within this loop, then we
790 can use a different pseudo in each unrolled copy of the loop. This
791 results in better code. */
792 /* We must limit the generic test to max_reg_before_loop, because only
793 these pseudo registers have valid regno_first_uid info. */
794 for (r = FIRST_PSEUDO_REGISTER; r < max_reg_before_loop; ++r)
795 if (REGNO_FIRST_UID (r) > 0 && REGNO_FIRST_UID (r) < max_uid_for_loop
796 && REGNO_FIRST_LUID (r) >= copy_start_luid
797 /* See the comment in reg_scan_mark_refs on the relationship between
798 last_uid and last_note_uid. */
799 && REGNO_LAST_UID (r) > 0 && REGNO_LAST_UID (r) < max_uid_for_loop
800 && REGNO_LAST_LUID (r) <= copy_end_luid
801 && REGNO_LAST_NOTE_UID (r) > 0 && REGNO_LAST_NOTE_UID (r) < max_uid_for_loop
802 && REGNO_LAST_NOTE_LUID (r) <= copy_end_luid)
803 {
804 /* However, we must also check for loop-carried dependencies.
805 If the value the pseudo has at the end of iteration X is
806 used by iteration X+1, then we can not use a different pseudo
807 for each unrolled copy of the loop. */
808 /* A pseudo is safe if regno_first_uid is a set, and this
809 set dominates all instructions from regno_first_uid to
810 regno_last_uid. */
811 /* ??? This check is simplistic. We would get better code if
812 this check was more sophisticated. */
813 if (set_dominates_use (r, REGNO_FIRST_UID (r), REGNO_LAST_UID (r),
814 copy_start, copy_end))
815 local_regno[r] = 1;
816
817 if (loop_dump_stream)
818 {
819 if (local_regno[r])
820 fprintf (loop_dump_stream, "Marked reg %d as local\n", r);
821 else
822 fprintf (loop_dump_stream, "Did not mark reg %d as local\n",
823 r);
824 }
825 }
826 }
827
828 /* If this loop requires exit tests when unrolled, check to see if we
829 can precondition the loop so as to make the exit tests unnecessary.
830 Just like variable splitting, this is not safe if the loop is entered
831 via a jump to the bottom. Also, can not do this if no strength
832 reduce info, because precondition_loop_p uses this info. */
833
834 /* Must copy the loop body for preconditioning before the following
835 find_splittable_regs call since that will emit insns which need to
836 be after the preconditioned loop copies, but immediately before the
837 unrolled loop copies. */
838
839 /* Also, it is not safe to split induction variables for the preconditioned
840 copies of the loop body. If we split induction variables, then the code
841 assumes that each induction variable can be represented as a function
842 of its initial value and the loop iteration number. This is not true
843 in this case, because the last preconditioned copy of the loop body
844 could be any iteration from the first up to the `unroll_number-1'th,
845 depending on the initial value of the iteration variable. Therefore
846 we can not split induction variables here, because we can not calculate
847 their value. Hence, this code must occur before find_splittable_regs
848 is called. */
849
850 if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p)
851 {
852 rtx initial_value, final_value, increment;
853 enum machine_mode mode;
854
855 if (precondition_loop_p (loop,
856 &initial_value, &final_value, &increment,
857 &mode))
858 {
859 rtx diff, insn;
860 rtx *labels;
861 int abs_inc, neg_inc;
862 enum rtx_code cc = loop_info->comparison_code;
863 int less_p = (cc == LE || cc == LEU || cc == LT || cc == LTU);
864 int unsigned_p = (cc == LEU || cc == GEU || cc == LTU || cc == GTU);
865
866 map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx));
867
868 VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray, maxregnum,
869 "unroll_loop_precondition");
870 global_const_equiv_varray = map->const_equiv_varray;
871
872 init_reg_map (map, maxregnum);
873
874 /* Limit loop unrolling to 4, since this will make 7 copies of
875 the loop body. */
876 if (unroll_number > 4)
877 unroll_number = 4;
878
879 /* Save the absolute value of the increment, and also whether or
880 not it is negative. */
881 neg_inc = 0;
882 abs_inc = INTVAL (increment);
883 if (abs_inc < 0)
884 {
885 abs_inc = -abs_inc;
886 neg_inc = 1;
887 }
888
889 start_sequence ();
890
891 /* We must copy the final and initial values here to avoid
892 improperly shared rtl. */
893 final_value = copy_rtx (final_value);
894 initial_value = copy_rtx (initial_value);
895
896 /* Final value may have form of (PLUS val1 const1_rtx). We need
897 to convert it into general operand, so compute the real value. */
898
899 final_value = force_operand (final_value, NULL_RTX);
900 if (!nonmemory_operand (final_value, VOIDmode))
901 final_value = force_reg (mode, final_value);
902
903 /* Calculate the difference between the final and initial values.
904 Final value may be a (plus (reg x) (const_int 1)) rtx.
905
906 We have to deal with for (i = 0; --i < 6;) type loops.
907 For such loops the real final value is the first time the
908 loop variable overflows, so the diff we calculate is the
909 distance from the overflow value. This is 0 or ~0 for
910 unsigned loops depending on the direction, or INT_MAX,
911 INT_MAX+1 for signed loops. We really do not need the
912 exact value, since we are only interested in the diff
913 modulo the increment, and the increment is a power of 2,
914 so we can pretend that the overflow value is 0/~0. */
915
916 if (cc == NE || less_p != neg_inc)
917 diff = simplify_gen_binary (MINUS, mode, final_value,
918 initial_value);
919 else
920 diff = simplify_gen_unary (neg_inc ? NOT : NEG, mode,
921 initial_value, mode);
922 diff = force_operand (diff, NULL_RTX);
923
924 /* Now calculate (diff % (unroll * abs (increment))) by using an
925 and instruction. */
926 diff = simplify_gen_binary (AND, mode, diff,
927 GEN_INT (unroll_number*abs_inc - 1));
928 diff = force_operand (diff, NULL_RTX);
929
930 /* Now emit a sequence of branches to jump to the proper precond
931 loop entry point. */
932
933 labels = (rtx *) xmalloc (sizeof (rtx) * unroll_number);
934 for (i = 0; i < unroll_number; i++)
935 labels[i] = gen_label_rtx ();
936
937 /* Check for the case where the initial value is greater than or
938 equal to the final value. In that case, we want to execute
939 exactly one loop iteration. The code below will fail for this
940 case. This check does not apply if the loop has a NE
941 comparison at the end. */
942
943 if (cc != NE)
944 {
945 rtx incremented_initval;
946 enum rtx_code cmp_code;
947
948 incremented_initval
949 = simplify_gen_binary (PLUS, mode, initial_value, increment);
950 incremented_initval
951 = force_operand (incremented_initval, NULL_RTX);
952
953 cmp_code = (less_p
954 ? (unsigned_p ? GEU : GE)
955 : (unsigned_p ? LEU : LE));
956
957 insn = simplify_cmp_and_jump_insns (cmp_code, mode,
958 incremented_initval,
959 final_value, labels[1]);
960 if (insn)
961 predict_insn_def (insn, PRED_LOOP_CONDITION, TAKEN);
962 }
963
964 /* Assuming the unroll_number is 4, and the increment is 2, then
965 for a negative increment: for a positive increment:
966 diff = 0,1 precond 0 diff = 0,7 precond 0
967 diff = 2,3 precond 3 diff = 1,2 precond 1
968 diff = 4,5 precond 2 diff = 3,4 precond 2
969 diff = 6,7 precond 1 diff = 5,6 precond 3 */
970
971 /* We only need to emit (unroll_number - 1) branches here, the
972 last case just falls through to the following code. */
973
974 /* ??? This would give better code if we emitted a tree of branches
975 instead of the current linear list of branches. */
976
977 for (i = 0; i < unroll_number - 1; i++)
978 {
979 int cmp_const;
980 enum rtx_code cmp_code;
981
982 /* For negative increments, must invert the constant compared
983 against, except when comparing against zero. */
984 if (i == 0)
985 {
986 cmp_const = 0;
987 cmp_code = EQ;
988 }
989 else if (neg_inc)
990 {
991 cmp_const = unroll_number - i;
992 cmp_code = GE;
993 }
994 else
995 {
996 cmp_const = i;
997 cmp_code = LE;
998 }
999
1000 insn = simplify_cmp_and_jump_insns (cmp_code, mode, diff,
1001 GEN_INT (abs_inc*cmp_const),
1002 labels[i]);
1003 if (insn)
1004 predict_insn (insn, PRED_LOOP_PRECONDITIONING,
1005 REG_BR_PROB_BASE / (unroll_number - i));
1006 }
1007
1008 /* If the increment is greater than one, then we need another branch,
1009 to handle other cases equivalent to 0. */
1010
1011 /* ??? This should be merged into the code above somehow to help
1012 simplify the code here, and reduce the number of branches emitted.
1013 For the negative increment case, the branch here could easily
1014 be merged with the `0' case branch above. For the positive
1015 increment case, it is not clear how this can be simplified. */
1016
1017 if (abs_inc != 1)
1018 {
1019 int cmp_const;
1020 enum rtx_code cmp_code;
1021
1022 if (neg_inc)
1023 {
1024 cmp_const = abs_inc - 1;
1025 cmp_code = LE;
1026 }
1027 else
1028 {
1029 cmp_const = abs_inc * (unroll_number - 1) + 1;
1030 cmp_code = GE;
1031 }
1032
1033 simplify_cmp_and_jump_insns (cmp_code, mode, diff,
1034 GEN_INT (cmp_const), labels[0]);
1035 }
1036
1037 sequence = get_insns ();
1038 end_sequence ();
1039 loop_insn_hoist (loop, sequence);
1040
1041 /* Only the last copy of the loop body here needs the exit
1042 test, so set copy_end to exclude the compare/branch here,
1043 and then reset it inside the loop when get to the last
1044 copy. */
1045
1046 if (GET_CODE (last_loop_insn) == BARRIER)
1047 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1048 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
1049 {
1050 copy_end = PREV_INSN (last_loop_insn);
1051 #ifdef HAVE_cc0
1052 /* The immediately preceding insn may be a compare which
1053 we do not want to copy. */
1054 if (sets_cc0_p (PREV_INSN (copy_end)))
1055 copy_end = PREV_INSN (copy_end);
1056 #endif
1057 }
1058 else
1059 abort ();
1060
1061 for (i = 1; i < unroll_number; i++)
1062 {
1063 emit_label_after (labels[unroll_number - i],
1064 PREV_INSN (loop_start));
1065
1066 memset ((char *) map->insn_map, 0, max_insnno * sizeof (rtx));
1067 memset ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0),
1068 0, (VARRAY_SIZE (map->const_equiv_varray)
1069 * sizeof (struct const_equiv_data)));
1070 map->const_age = 0;
1071
1072 for (j = 0; j < max_labelno; j++)
1073 if (local_label[j])
1074 set_label_in_map (map, j, gen_label_rtx ());
1075
1076 for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++)
1077 if (local_regno[r])
1078 {
1079 map->reg_map[r]
1080 = gen_reg_rtx (GET_MODE (regno_reg_rtx[r]));
1081 record_base_value (REGNO (map->reg_map[r]),
1082 regno_reg_rtx[r], 0);
1083 }
1084 /* The last copy needs the compare/branch insns at the end,
1085 so reset copy_end here if the loop ends with a conditional
1086 branch. */
1087
1088 if (i == unroll_number - 1)
1089 {
1090 if (GET_CODE (last_loop_insn) == BARRIER)
1091 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1092 else
1093 copy_end = last_loop_insn;
1094 }
1095
1096 /* None of the copies are the `last_iteration', so just
1097 pass zero for that parameter. */
1098 copy_loop_body (loop, copy_start, copy_end, map, exit_label, 0,
1099 unroll_type, start_label, loop_end,
1100 loop_start, copy_end);
1101 }
1102 emit_label_after (labels[0], PREV_INSN (loop_start));
1103
1104 if (GET_CODE (last_loop_insn) == BARRIER)
1105 {
1106 insert_before = PREV_INSN (last_loop_insn);
1107 copy_end = PREV_INSN (insert_before);
1108 }
1109 else
1110 {
1111 insert_before = last_loop_insn;
1112 #ifdef HAVE_cc0
1113 /* The instruction immediately before the JUMP_INSN may
1114 be a compare instruction which we do not want to copy
1115 or delete. */
1116 if (sets_cc0_p (PREV_INSN (insert_before)))
1117 insert_before = PREV_INSN (insert_before);
1118 #endif
1119 copy_end = PREV_INSN (insert_before);
1120 }
1121
1122 /* Set unroll type to MODULO now. */
1123 unroll_type = UNROLL_MODULO;
1124 loop_preconditioned = 1;
1125
1126 /* Preconditioning changes the loop's initial value. We set
1127 it to an unknown value so that doloop_optimize won't get
1128 confused. */
1129 loop_info->initial_value = 0;
1130
1131 /* Clean up. */
1132 free (labels);
1133 }
1134 }
1135
1136 /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
1137 the loop unless all loops are being unrolled. */
1138 if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
1139 {
1140 if (loop_dump_stream)
1141 fprintf (loop_dump_stream,
1142 "Unrolling failure: Naive unrolling not being done.\n");
1143 goto egress;
1144 }
1145
1146 /* At this point, we are guaranteed to unroll the loop. */
1147
1148 /* Keep track of the unroll factor for the loop. */
1149 loop_info->unroll_number = unroll_number;
1150
1151 /* And whether the loop has been preconditioned. */
1152 loop_info->preconditioned = loop_preconditioned;
1153
1154 /* Remember whether it was preconditioned for the second loop pass. */
1155 NOTE_PRECONDITIONED (loop->end) = loop_preconditioned;
1156
1157 /* For each biv and giv, determine whether it can be safely split into
1158 a different variable for each unrolled copy of the loop body.
1159 We precalculate and save this info here, since computing it is
1160 expensive.
1161
1162 Do this before deleting any instructions from the loop, so that
1163 back_branch_in_range_p will work correctly. */
1164
1165 if (splitting_not_safe)
1166 temp = 0;
1167 else
1168 temp = find_splittable_regs (loop, unroll_type, unroll_number);
1169
1170 /* find_splittable_regs may have created some new registers, so must
1171 reallocate the reg_map with the new larger size, and must realloc
1172 the constant maps also. */
1173
1174 maxregnum = max_reg_num ();
1175 map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx));
1176
1177 init_reg_map (map, maxregnum);
1178
1179 if (map->const_equiv_varray == 0)
1180 VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray,
1181 maxregnum + temp * unroll_number * 2,
1182 "unroll_loop");
1183 global_const_equiv_varray = map->const_equiv_varray;
1184
1185 /* Search the list of bivs and givs to find ones which need to be remapped
1186 when split, and set their reg_map entry appropriately. */
1187
1188 for (bl = ivs->list; bl; bl = bl->next)
1189 {
1190 if (REGNO (bl->biv->src_reg) != bl->regno)
1191 map->reg_map[bl->regno] = bl->biv->src_reg;
1192 #if 0
1193 /* Currently, non-reduced/final-value givs are never split. */
1194 for (v = bl->giv; v; v = v->next_iv)
1195 if (REGNO (v->src_reg) != bl->regno)
1196 map->reg_map[REGNO (v->dest_reg)] = v->src_reg;
1197 #endif
1198 }
1199
1200 /* Use our current register alignment and pointer flags. */
1201 map->regno_pointer_align = cfun->emit->regno_pointer_align;
1202 map->x_regno_reg_rtx = cfun->emit->x_regno_reg_rtx;
1203
1204 /* If the loop is being partially unrolled, and the iteration variables
1205 are being split, and are being renamed for the split, then must fix up
1206 the compare/jump instruction at the end of the loop to refer to the new
1207 registers. This compare isn't copied, so the registers used in it
1208 will never be replaced if it isn't done here. */
1209
1210 if (unroll_type == UNROLL_MODULO)
1211 {
1212 insn = NEXT_INSN (copy_end);
1213 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
1214 PATTERN (insn) = remap_split_bivs (loop, PATTERN (insn));
1215 }
1216
1217 /* For unroll_number times, make a copy of each instruction
1218 between copy_start and copy_end, and insert these new instructions
1219 before the end of the loop. */
1220
1221 for (i = 0; i < unroll_number; i++)
1222 {
1223 memset ((char *) map->insn_map, 0, max_insnno * sizeof (rtx));
1224 memset ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0), 0,
1225 VARRAY_SIZE (map->const_equiv_varray) * sizeof (struct const_equiv_data));
1226 map->const_age = 0;
1227
1228 for (j = 0; j < max_labelno; j++)
1229 if (local_label[j])
1230 set_label_in_map (map, j, gen_label_rtx ());
1231
1232 for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++)
1233 if (local_regno[r])
1234 {
1235 map->reg_map[r] = gen_reg_rtx (GET_MODE (regno_reg_rtx[r]));
1236 record_base_value (REGNO (map->reg_map[r]),
1237 regno_reg_rtx[r], 0);
1238 }
1239
1240 /* If loop starts with a branch to the test, then fix it so that
1241 it points to the test of the first unrolled copy of the loop. */
1242 if (i == 0 && loop_start != copy_start)
1243 {
1244 insn = PREV_INSN (copy_start);
1245 pattern = PATTERN (insn);
1246
1247 tem = get_label_from_map (map,
1248 CODE_LABEL_NUMBER
1249 (XEXP (SET_SRC (pattern), 0)));
1250 SET_SRC (pattern) = gen_rtx_LABEL_REF (VOIDmode, tem);
1251
1252 /* Set the jump label so that it can be used by later loop unrolling
1253 passes. */
1254 JUMP_LABEL (insn) = tem;
1255 LABEL_NUSES (tem)++;
1256 }
1257
1258 copy_loop_body (loop, copy_start, copy_end, map, exit_label,
1259 i == unroll_number - 1, unroll_type, start_label,
1260 loop_end, insert_before, insert_before);
1261 }
1262
1263 /* Before deleting any insns, emit a CODE_LABEL immediately after the last
1264 insn to be deleted. This prevents any runaway delete_insn call from
1265 more insns that it should, as it always stops at a CODE_LABEL. */
1266
1267 /* Delete the compare and branch at the end of the loop if completely
1268 unrolling the loop. Deleting the backward branch at the end also
1269 deletes the code label at the start of the loop. This is done at
1270 the very end to avoid problems with back_branch_in_range_p. */
1271
1272 if (unroll_type == UNROLL_COMPLETELY)
1273 safety_label = emit_label_after (gen_label_rtx (), last_loop_insn);
1274 else
1275 safety_label = emit_label_after (gen_label_rtx (), copy_end);
1276
1277 /* Delete all of the original loop instructions. Don't delete the
1278 LOOP_BEG note, or the first code label in the loop. */
1279
1280 insn = NEXT_INSN (copy_start);
1281 while (insn != safety_label)
1282 {
1283 /* ??? Don't delete named code labels. They will be deleted when the
1284 jump that references them is deleted. Otherwise, we end up deleting
1285 them twice, which causes them to completely disappear instead of turn
1286 into NOTE_INSN_DELETED_LABEL notes. This in turn causes aborts in
1287 dwarfout.c/dwarf2out.c. We could perhaps fix the dwarf*out.c files
1288 to handle deleted labels instead. Or perhaps fix DECL_RTL of the
1289 associated LABEL_DECL to point to one of the new label instances. */
1290 /* ??? Likewise, we can't delete a NOTE_INSN_DELETED_LABEL note. */
1291 if (insn != start_label
1292 && ! (GET_CODE (insn) == CODE_LABEL && LABEL_NAME (insn))
1293 && ! (GET_CODE (insn) == NOTE
1294 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))
1295 insn = delete_related_insns (insn);
1296 else
1297 insn = NEXT_INSN (insn);
1298 }
1299
1300 /* Can now delete the 'safety' label emitted to protect us from runaway
1301 delete_related_insns calls. */
1302 if (INSN_DELETED_P (safety_label))
1303 abort ();
1304 delete_related_insns (safety_label);
1305
1306 /* If exit_label exists, emit it after the loop. Doing the emit here
1307 forces it to have a higher INSN_UID than any insn in the unrolled loop.
1308 This is needed so that mostly_true_jump in reorg.c will treat jumps
1309 to this loop end label correctly, i.e. predict that they are usually
1310 not taken. */
1311 if (exit_label)
1312 emit_label_after (exit_label, loop_end);
1313
1314 egress:
1315 if (unroll_type == UNROLL_COMPLETELY)
1316 {
1317 /* Remove the loop notes since this is no longer a loop. */
1318 if (loop->vtop)
1319 delete_related_insns (loop->vtop);
1320 if (loop->cont)
1321 delete_related_insns (loop->cont);
1322 if (loop_start)
1323 delete_related_insns (loop_start);
1324 if (loop_end)
1325 delete_related_insns (loop_end);
1326 }
1327
1328 if (map->const_equiv_varray)
1329 VARRAY_FREE (map->const_equiv_varray);
1330 if (map->label_map)
1331 {
1332 free (map->label_map);
1333 free (local_label);
1334 }
1335 free (map->insn_map);
1336 free (splittable_regs);
1337 free (splittable_regs_updates);
1338 free (addr_combined_regs);
1339 free (local_regno);
1340 if (map->reg_map)
1341 free (map->reg_map);
1342 free (map);
1343 }
1344
1345 /* A helper function for unroll_loop. Emit a compare and branch to
1346 satisfy (CMP OP1 OP2), but pass this through the simplifier first.
1347 If the branch turned out to be conditional, return it, otherwise
1348 return NULL. */
1349
1350 static rtx
simplify_cmp_and_jump_insns(code,mode,op0,op1,label)1351 simplify_cmp_and_jump_insns (code, mode, op0, op1, label)
1352 enum rtx_code code;
1353 enum machine_mode mode;
1354 rtx op0, op1, label;
1355 {
1356 rtx t, insn;
1357
1358 t = simplify_relational_operation (code, mode, op0, op1);
1359 if (!t)
1360 {
1361 enum rtx_code scode = signed_condition (code);
1362 emit_cmp_and_jump_insns (op0, op1, scode, NULL_RTX, mode,
1363 code != scode, label);
1364 insn = get_last_insn ();
1365
1366 JUMP_LABEL (insn) = label;
1367 LABEL_NUSES (label) += 1;
1368
1369 return insn;
1370 }
1371 else if (t == const_true_rtx)
1372 {
1373 insn = emit_jump_insn (gen_jump (label));
1374 emit_barrier ();
1375 JUMP_LABEL (insn) = label;
1376 LABEL_NUSES (label) += 1;
1377 }
1378
1379 return NULL_RTX;
1380 }
1381
1382 /* Return true if the loop can be safely, and profitably, preconditioned
1383 so that the unrolled copies of the loop body don't need exit tests.
1384
1385 This only works if final_value, initial_value and increment can be
1386 determined, and if increment is a constant power of 2.
1387 If increment is not a power of 2, then the preconditioning modulo
1388 operation would require a real modulo instead of a boolean AND, and this
1389 is not considered `profitable'. */
1390
1391 /* ??? If the loop is known to be executed very many times, or the machine
1392 has a very cheap divide instruction, then preconditioning is a win even
1393 when the increment is not a power of 2. Use RTX_COST to compute
1394 whether divide is cheap.
1395 ??? A divide by constant doesn't actually need a divide, look at
1396 expand_divmod. The reduced cost of this optimized modulo is not
1397 reflected in RTX_COST. */
1398
1399 int
precondition_loop_p(loop,initial_value,final_value,increment,mode)1400 precondition_loop_p (loop, initial_value, final_value, increment, mode)
1401 const struct loop *loop;
1402 rtx *initial_value, *final_value, *increment;
1403 enum machine_mode *mode;
1404 {
1405 rtx loop_start = loop->start;
1406 struct loop_info *loop_info = LOOP_INFO (loop);
1407
1408 if (loop_info->n_iterations > 0)
1409 {
1410 if (INTVAL (loop_info->increment) > 0)
1411 {
1412 *initial_value = const0_rtx;
1413 *increment = const1_rtx;
1414 *final_value = GEN_INT (loop_info->n_iterations);
1415 }
1416 else
1417 {
1418 *initial_value = GEN_INT (loop_info->n_iterations);
1419 *increment = constm1_rtx;
1420 *final_value = const0_rtx;
1421 }
1422 *mode = word_mode;
1423
1424 if (loop_dump_stream)
1425 {
1426 fputs ("Preconditioning: Success, number of iterations known, ",
1427 loop_dump_stream);
1428 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
1429 loop_info->n_iterations);
1430 fputs (".\n", loop_dump_stream);
1431 }
1432 return 1;
1433 }
1434
1435 if (loop_info->iteration_var == 0)
1436 {
1437 if (loop_dump_stream)
1438 fprintf (loop_dump_stream,
1439 "Preconditioning: Could not find iteration variable.\n");
1440 return 0;
1441 }
1442 else if (loop_info->initial_value == 0)
1443 {
1444 if (loop_dump_stream)
1445 fprintf (loop_dump_stream,
1446 "Preconditioning: Could not find initial value.\n");
1447 return 0;
1448 }
1449 else if (loop_info->increment == 0)
1450 {
1451 if (loop_dump_stream)
1452 fprintf (loop_dump_stream,
1453 "Preconditioning: Could not find increment value.\n");
1454 return 0;
1455 }
1456 else if (GET_CODE (loop_info->increment) != CONST_INT)
1457 {
1458 if (loop_dump_stream)
1459 fprintf (loop_dump_stream,
1460 "Preconditioning: Increment not a constant.\n");
1461 return 0;
1462 }
1463 else if ((exact_log2 (INTVAL (loop_info->increment)) < 0)
1464 && (exact_log2 (-INTVAL (loop_info->increment)) < 0))
1465 {
1466 if (loop_dump_stream)
1467 fprintf (loop_dump_stream,
1468 "Preconditioning: Increment not a constant power of 2.\n");
1469 return 0;
1470 }
1471
1472 /* Unsigned_compare and compare_dir can be ignored here, since they do
1473 not matter for preconditioning. */
1474
1475 if (loop_info->final_value == 0)
1476 {
1477 if (loop_dump_stream)
1478 fprintf (loop_dump_stream,
1479 "Preconditioning: EQ comparison loop.\n");
1480 return 0;
1481 }
1482
1483 /* Must ensure that final_value is invariant, so call
1484 loop_invariant_p to check. Before doing so, must check regno
1485 against max_reg_before_loop to make sure that the register is in
1486 the range covered by loop_invariant_p. If it isn't, then it is
1487 most likely a biv/giv which by definition are not invariant. */
1488 if ((GET_CODE (loop_info->final_value) == REG
1489 && REGNO (loop_info->final_value) >= max_reg_before_loop)
1490 || (GET_CODE (loop_info->final_value) == PLUS
1491 && REGNO (XEXP (loop_info->final_value, 0)) >= max_reg_before_loop)
1492 || ! loop_invariant_p (loop, loop_info->final_value))
1493 {
1494 if (loop_dump_stream)
1495 fprintf (loop_dump_stream,
1496 "Preconditioning: Final value not invariant.\n");
1497 return 0;
1498 }
1499
1500 /* Fail for floating point values, since the caller of this function
1501 does not have code to deal with them. */
1502 if (GET_MODE_CLASS (GET_MODE (loop_info->final_value)) == MODE_FLOAT
1503 || GET_MODE_CLASS (GET_MODE (loop_info->initial_value)) == MODE_FLOAT)
1504 {
1505 if (loop_dump_stream)
1506 fprintf (loop_dump_stream,
1507 "Preconditioning: Floating point final or initial value.\n");
1508 return 0;
1509 }
1510
1511 /* Fail if loop_info->iteration_var is not live before loop_start,
1512 since we need to test its value in the preconditioning code. */
1513
1514 if (REGNO_FIRST_LUID (REGNO (loop_info->iteration_var))
1515 > INSN_LUID (loop_start))
1516 {
1517 if (loop_dump_stream)
1518 fprintf (loop_dump_stream,
1519 "Preconditioning: Iteration var not live before loop start.\n");
1520 return 0;
1521 }
1522
1523 /* Note that loop_iterations biases the initial value for GIV iterators
1524 such as "while (i-- > 0)" so that we can calculate the number of
1525 iterations just like for BIV iterators.
1526
1527 Also note that the absolute values of initial_value and
1528 final_value are unimportant as only their difference is used for
1529 calculating the number of loop iterations. */
1530 *initial_value = loop_info->initial_value;
1531 *increment = loop_info->increment;
1532 *final_value = loop_info->final_value;
1533
1534 /* Decide what mode to do these calculations in. Choose the larger
1535 of final_value's mode and initial_value's mode, or a full-word if
1536 both are constants. */
1537 *mode = GET_MODE (*final_value);
1538 if (*mode == VOIDmode)
1539 {
1540 *mode = GET_MODE (*initial_value);
1541 if (*mode == VOIDmode)
1542 *mode = word_mode;
1543 }
1544 else if (*mode != GET_MODE (*initial_value)
1545 && (GET_MODE_SIZE (*mode)
1546 < GET_MODE_SIZE (GET_MODE (*initial_value))))
1547 *mode = GET_MODE (*initial_value);
1548
1549 /* Success! */
1550 if (loop_dump_stream)
1551 fprintf (loop_dump_stream, "Preconditioning: Successful.\n");
1552 return 1;
1553 }
1554
1555 /* All pseudo-registers must be mapped to themselves. Two hard registers
1556 must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_
1557 REGNUM, to avoid function-inlining specific conversions of these
1558 registers. All other hard regs can not be mapped because they may be
1559 used with different
1560 modes. */
1561
1562 static void
init_reg_map(map,maxregnum)1563 init_reg_map (map, maxregnum)
1564 struct inline_remap *map;
1565 int maxregnum;
1566 {
1567 int i;
1568
1569 for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--)
1570 map->reg_map[i] = regno_reg_rtx[i];
1571 /* Just clear the rest of the entries. */
1572 for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--)
1573 map->reg_map[i] = 0;
1574
1575 map->reg_map[VIRTUAL_STACK_VARS_REGNUM]
1576 = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM];
1577 map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM]
1578 = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM];
1579 }
1580
1581 /* Strength-reduction will often emit code for optimized biv/givs which
1582 calculates their value in a temporary register, and then copies the result
1583 to the iv. This procedure reconstructs the pattern computing the iv;
1584 verifying that all operands are of the proper form.
1585
1586 PATTERN must be the result of single_set.
1587 The return value is the amount that the giv is incremented by. */
1588
1589 static rtx
calculate_giv_inc(pattern,src_insn,regno)1590 calculate_giv_inc (pattern, src_insn, regno)
1591 rtx pattern, src_insn;
1592 unsigned int regno;
1593 {
1594 rtx increment;
1595 rtx increment_total = 0;
1596 int tries = 0;
1597
1598 retry:
1599 /* Verify that we have an increment insn here. First check for a plus
1600 as the set source. */
1601 if (GET_CODE (SET_SRC (pattern)) != PLUS)
1602 {
1603 /* SR sometimes computes the new giv value in a temp, then copies it
1604 to the new_reg. */
1605 src_insn = PREV_INSN (src_insn);
1606 pattern = single_set (src_insn);
1607 if (GET_CODE (SET_SRC (pattern)) != PLUS)
1608 abort ();
1609
1610 /* The last insn emitted is not needed, so delete it to avoid confusing
1611 the second cse pass. This insn sets the giv unnecessarily. */
1612 delete_related_insns (get_last_insn ());
1613 }
1614
1615 /* Verify that we have a constant as the second operand of the plus. */
1616 increment = XEXP (SET_SRC (pattern), 1);
1617 if (GET_CODE (increment) != CONST_INT)
1618 {
1619 /* SR sometimes puts the constant in a register, especially if it is
1620 too big to be an add immed operand. */
1621 increment = find_last_value (increment, &src_insn, NULL_RTX, 0);
1622
1623 /* SR may have used LO_SUM to compute the constant if it is too large
1624 for a load immed operand. In this case, the constant is in operand
1625 one of the LO_SUM rtx. */
1626 if (GET_CODE (increment) == LO_SUM)
1627 increment = XEXP (increment, 1);
1628
1629 /* Some ports store large constants in memory and add a REG_EQUAL
1630 note to the store insn. */
1631 else if (GET_CODE (increment) == MEM)
1632 {
1633 rtx note = find_reg_note (src_insn, REG_EQUAL, 0);
1634 if (note)
1635 increment = XEXP (note, 0);
1636 }
1637
1638 else if (GET_CODE (increment) == IOR
1639 || GET_CODE (increment) == PLUS
1640 || GET_CODE (increment) == ASHIFT
1641 || GET_CODE (increment) == LSHIFTRT)
1642 {
1643 /* The rs6000 port loads some constants with IOR.
1644 The alpha port loads some constants with ASHIFT and PLUS.
1645 The sparc64 port loads some constants with LSHIFTRT. */
1646 rtx second_part = XEXP (increment, 1);
1647 enum rtx_code code = GET_CODE (increment);
1648
1649 increment = find_last_value (XEXP (increment, 0),
1650 &src_insn, NULL_RTX, 0);
1651 /* Don't need the last insn anymore. */
1652 delete_related_insns (get_last_insn ());
1653
1654 if (GET_CODE (second_part) != CONST_INT
1655 || GET_CODE (increment) != CONST_INT)
1656 abort ();
1657
1658 if (code == IOR)
1659 increment = GEN_INT (INTVAL (increment) | INTVAL (second_part));
1660 else if (code == PLUS)
1661 increment = GEN_INT (INTVAL (increment) + INTVAL (second_part));
1662 else if (code == ASHIFT)
1663 increment = GEN_INT (INTVAL (increment) << INTVAL (second_part));
1664 else
1665 increment = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (increment) >> INTVAL (second_part));
1666 }
1667
1668 if (GET_CODE (increment) != CONST_INT)
1669 abort ();
1670
1671 /* The insn loading the constant into a register is no longer needed,
1672 so delete it. */
1673 delete_related_insns (get_last_insn ());
1674 }
1675
1676 if (increment_total)
1677 increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment));
1678 else
1679 increment_total = increment;
1680
1681 /* Check that the source register is the same as the register we expected
1682 to see as the source. If not, something is seriously wrong. */
1683 if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
1684 || REGNO (XEXP (SET_SRC (pattern), 0)) != regno)
1685 {
1686 /* Some machines (e.g. the romp), may emit two add instructions for
1687 certain constants, so lets try looking for another add immediately
1688 before this one if we have only seen one add insn so far. */
1689
1690 if (tries == 0)
1691 {
1692 tries++;
1693
1694 src_insn = PREV_INSN (src_insn);
1695 pattern = single_set (src_insn);
1696
1697 delete_related_insns (get_last_insn ());
1698
1699 goto retry;
1700 }
1701
1702 abort ();
1703 }
1704
1705 return increment_total;
1706 }
1707
1708 /* Copy REG_NOTES, except for insn references, because not all insn_map
1709 entries are valid yet. We do need to copy registers now though, because
1710 the reg_map entries can change during copying. */
1711
1712 static rtx
initial_reg_note_copy(notes,map)1713 initial_reg_note_copy (notes, map)
1714 rtx notes;
1715 struct inline_remap *map;
1716 {
1717 rtx copy;
1718
1719 if (notes == 0)
1720 return 0;
1721
1722 copy = rtx_alloc (GET_CODE (notes));
1723 PUT_REG_NOTE_KIND (copy, REG_NOTE_KIND (notes));
1724
1725 if (GET_CODE (notes) == EXPR_LIST)
1726 XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map, 0);
1727 else if (GET_CODE (notes) == INSN_LIST)
1728 /* Don't substitute for these yet. */
1729 XEXP (copy, 0) = copy_rtx (XEXP (notes, 0));
1730 else
1731 abort ();
1732
1733 XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map);
1734
1735 return copy;
1736 }
1737
1738 /* Fixup insn references in copied REG_NOTES. */
1739
1740 static void
final_reg_note_copy(notesp,map)1741 final_reg_note_copy (notesp, map)
1742 rtx *notesp;
1743 struct inline_remap *map;
1744 {
1745 while (*notesp)
1746 {
1747 rtx note = *notesp;
1748
1749 if (GET_CODE (note) == INSN_LIST)
1750 {
1751 /* Sometimes, we have a REG_WAS_0 note that points to a
1752 deleted instruction. In that case, we can just delete the
1753 note. */
1754 if (REG_NOTE_KIND (note) == REG_WAS_0)
1755 {
1756 *notesp = XEXP (note, 1);
1757 continue;
1758 }
1759 else
1760 {
1761 rtx insn = map->insn_map[INSN_UID (XEXP (note, 0))];
1762
1763 /* If we failed to remap the note, something is awry.
1764 Allow REG_LABEL as it may reference label outside
1765 the unrolled loop. */
1766 if (!insn)
1767 {
1768 if (REG_NOTE_KIND (note) != REG_LABEL)
1769 abort ();
1770 }
1771 else
1772 XEXP (note, 0) = insn;
1773 }
1774 }
1775
1776 notesp = &XEXP (note, 1);
1777 }
1778 }
1779
1780 /* Copy each instruction in the loop, substituting from map as appropriate.
1781 This is very similar to a loop in expand_inline_function. */
1782
1783 static void
copy_loop_body(loop,copy_start,copy_end,map,exit_label,last_iteration,unroll_type,start_label,loop_end,insert_before,copy_notes_from)1784 copy_loop_body (loop, copy_start, copy_end, map, exit_label, last_iteration,
1785 unroll_type, start_label, loop_end, insert_before,
1786 copy_notes_from)
1787 struct loop *loop;
1788 rtx copy_start, copy_end;
1789 struct inline_remap *map;
1790 rtx exit_label;
1791 int last_iteration;
1792 enum unroll_types unroll_type;
1793 rtx start_label, loop_end, insert_before, copy_notes_from;
1794 {
1795 struct loop_ivs *ivs = LOOP_IVS (loop);
1796 rtx insn, pattern;
1797 rtx set, tem, copy = NULL_RTX;
1798 int dest_reg_was_split, i;
1799 #ifdef HAVE_cc0
1800 rtx cc0_insn = 0;
1801 #endif
1802 rtx final_label = 0;
1803 rtx giv_inc, giv_dest_reg, giv_src_reg;
1804
1805 /* If this isn't the last iteration, then map any references to the
1806 start_label to final_label. Final label will then be emitted immediately
1807 after the end of this loop body if it was ever used.
1808
1809 If this is the last iteration, then map references to the start_label
1810 to itself. */
1811 if (! last_iteration)
1812 {
1813 final_label = gen_label_rtx ();
1814 set_label_in_map (map, CODE_LABEL_NUMBER (start_label), final_label);
1815 }
1816 else
1817 set_label_in_map (map, CODE_LABEL_NUMBER (start_label), start_label);
1818
1819 start_sequence ();
1820
1821 insn = copy_start;
1822 do
1823 {
1824 insn = NEXT_INSN (insn);
1825
1826 map->orig_asm_operands_vector = 0;
1827
1828 switch (GET_CODE (insn))
1829 {
1830 case INSN:
1831 pattern = PATTERN (insn);
1832 copy = 0;
1833 giv_inc = 0;
1834
1835 /* Check to see if this is a giv that has been combined with
1836 some split address givs. (Combined in the sense that
1837 `combine_givs' in loop.c has put two givs in the same register.)
1838 In this case, we must search all givs based on the same biv to
1839 find the address givs. Then split the address givs.
1840 Do this before splitting the giv, since that may map the
1841 SET_DEST to a new register. */
1842
1843 if ((set = single_set (insn))
1844 && GET_CODE (SET_DEST (set)) == REG
1845 && addr_combined_regs[REGNO (SET_DEST (set))])
1846 {
1847 struct iv_class *bl;
1848 struct induction *v, *tv;
1849 unsigned int regno = REGNO (SET_DEST (set));
1850
1851 v = addr_combined_regs[REGNO (SET_DEST (set))];
1852 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
1853
1854 /* Although the giv_inc amount is not needed here, we must call
1855 calculate_giv_inc here since it might try to delete the
1856 last insn emitted. If we wait until later to call it,
1857 we might accidentally delete insns generated immediately
1858 below by emit_unrolled_add. */
1859
1860 giv_inc = calculate_giv_inc (set, insn, regno);
1861
1862 /* Now find all address giv's that were combined with this
1863 giv 'v'. */
1864 for (tv = bl->giv; tv; tv = tv->next_iv)
1865 if (tv->giv_type == DEST_ADDR && tv->same == v)
1866 {
1867 int this_giv_inc;
1868
1869 /* If this DEST_ADDR giv was not split, then ignore it. */
1870 if (*tv->location != tv->dest_reg)
1871 continue;
1872
1873 /* Scale this_giv_inc if the multiplicative factors of
1874 the two givs are different. */
1875 this_giv_inc = INTVAL (giv_inc);
1876 if (tv->mult_val != v->mult_val)
1877 this_giv_inc = (this_giv_inc / INTVAL (v->mult_val)
1878 * INTVAL (tv->mult_val));
1879
1880 tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc);
1881 *tv->location = tv->dest_reg;
1882
1883 if (last_iteration && unroll_type != UNROLL_COMPLETELY)
1884 {
1885 /* Must emit an insn to increment the split address
1886 giv. Add in the const_adjust field in case there
1887 was a constant eliminated from the address. */
1888 rtx value, dest_reg;
1889
1890 /* tv->dest_reg will be either a bare register,
1891 or else a register plus a constant. */
1892 if (GET_CODE (tv->dest_reg) == REG)
1893 dest_reg = tv->dest_reg;
1894 else
1895 dest_reg = XEXP (tv->dest_reg, 0);
1896
1897 /* Check for shared address givs, and avoid
1898 incrementing the shared pseudo reg more than
1899 once. */
1900 if (! tv->same_insn && ! tv->shared)
1901 {
1902 /* tv->dest_reg may actually be a (PLUS (REG)
1903 (CONST)) here, so we must call plus_constant
1904 to add the const_adjust amount before calling
1905 emit_unrolled_add below. */
1906 value = plus_constant (tv->dest_reg,
1907 tv->const_adjust);
1908
1909 if (GET_CODE (value) == PLUS)
1910 {
1911 /* The constant could be too large for an add
1912 immediate, so can't directly emit an insn
1913 here. */
1914 emit_unrolled_add (dest_reg, XEXP (value, 0),
1915 XEXP (value, 1));
1916 }
1917 }
1918
1919 /* Reset the giv to be just the register again, in case
1920 it is used after the set we have just emitted.
1921 We must subtract the const_adjust factor added in
1922 above. */
1923 tv->dest_reg = plus_constant (dest_reg,
1924 -tv->const_adjust);
1925 *tv->location = tv->dest_reg;
1926 }
1927 }
1928 }
1929
1930 /* If this is a setting of a splittable variable, then determine
1931 how to split the variable, create a new set based on this split,
1932 and set up the reg_map so that later uses of the variable will
1933 use the new split variable. */
1934
1935 dest_reg_was_split = 0;
1936
1937 if ((set = single_set (insn))
1938 && GET_CODE (SET_DEST (set)) == REG
1939 && splittable_regs[REGNO (SET_DEST (set))])
1940 {
1941 unsigned int regno = REGNO (SET_DEST (set));
1942 unsigned int src_regno;
1943
1944 dest_reg_was_split = 1;
1945
1946 giv_dest_reg = SET_DEST (set);
1947 giv_src_reg = giv_dest_reg;
1948 /* Compute the increment value for the giv, if it wasn't
1949 already computed above. */
1950 if (giv_inc == 0)
1951 giv_inc = calculate_giv_inc (set, insn, regno);
1952
1953 src_regno = REGNO (giv_src_reg);
1954
1955 if (unroll_type == UNROLL_COMPLETELY)
1956 {
1957 /* Completely unrolling the loop. Set the induction
1958 variable to a known constant value. */
1959
1960 /* The value in splittable_regs may be an invariant
1961 value, so we must use plus_constant here. */
1962 splittable_regs[regno]
1963 = plus_constant (splittable_regs[src_regno],
1964 INTVAL (giv_inc));
1965
1966 if (GET_CODE (splittable_regs[regno]) == PLUS)
1967 {
1968 giv_src_reg = XEXP (splittable_regs[regno], 0);
1969 giv_inc = XEXP (splittable_regs[regno], 1);
1970 }
1971 else
1972 {
1973 /* The splittable_regs value must be a REG or a
1974 CONST_INT, so put the entire value in the giv_src_reg
1975 variable. */
1976 giv_src_reg = splittable_regs[regno];
1977 giv_inc = const0_rtx;
1978 }
1979 }
1980 else
1981 {
1982 /* Partially unrolling loop. Create a new pseudo
1983 register for the iteration variable, and set it to
1984 be a constant plus the original register. Except
1985 on the last iteration, when the result has to
1986 go back into the original iteration var register. */
1987
1988 /* Handle bivs which must be mapped to a new register
1989 when split. This happens for bivs which need their
1990 final value set before loop entry. The new register
1991 for the biv was stored in the biv's first struct
1992 induction entry by find_splittable_regs. */
1993
1994 if (regno < ivs->n_regs
1995 && REG_IV_TYPE (ivs, regno) == BASIC_INDUCT)
1996 {
1997 giv_src_reg = REG_IV_CLASS (ivs, regno)->biv->src_reg;
1998 giv_dest_reg = giv_src_reg;
1999 }
2000
2001 #if 0
2002 /* If non-reduced/final-value givs were split, then
2003 this would have to remap those givs also. See
2004 find_splittable_regs. */
2005 #endif
2006
2007 splittable_regs[regno]
2008 = simplify_gen_binary (PLUS, GET_MODE (giv_src_reg),
2009 giv_inc,
2010 splittable_regs[src_regno]);
2011 giv_inc = splittable_regs[regno];
2012
2013 /* Now split the induction variable by changing the dest
2014 of this insn to a new register, and setting its
2015 reg_map entry to point to this new register.
2016
2017 If this is the last iteration, and this is the last insn
2018 that will update the iv, then reuse the original dest,
2019 to ensure that the iv will have the proper value when
2020 the loop exits or repeats.
2021
2022 Using splittable_regs_updates here like this is safe,
2023 because it can only be greater than one if all
2024 instructions modifying the iv are always executed in
2025 order. */
2026
2027 if (! last_iteration
2028 || (splittable_regs_updates[regno]-- != 1))
2029 {
2030 tem = gen_reg_rtx (GET_MODE (giv_src_reg));
2031 giv_dest_reg = tem;
2032 map->reg_map[regno] = tem;
2033 record_base_value (REGNO (tem),
2034 giv_inc == const0_rtx
2035 ? giv_src_reg
2036 : gen_rtx_PLUS (GET_MODE (giv_src_reg),
2037 giv_src_reg, giv_inc),
2038 1);
2039 }
2040 else
2041 map->reg_map[regno] = giv_src_reg;
2042 }
2043
2044 /* The constant being added could be too large for an add
2045 immediate, so can't directly emit an insn here. */
2046 emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc);
2047 copy = get_last_insn ();
2048 pattern = PATTERN (copy);
2049 }
2050 else
2051 {
2052 pattern = copy_rtx_and_substitute (pattern, map, 0);
2053 copy = emit_insn (pattern);
2054 }
2055 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2056 INSN_SCOPE (copy) = INSN_SCOPE (insn);
2057
2058 /* If there is a REG_EQUAL note present whose value
2059 is not loop invariant, then delete it, since it
2060 may cause problems with later optimization passes. */
2061 if ((tem = find_reg_note (copy, REG_EQUAL, NULL_RTX))
2062 && !loop_invariant_p (loop, XEXP (tem, 0)))
2063 remove_note (copy, tem);
2064
2065 #ifdef HAVE_cc0
2066 /* If this insn is setting CC0, it may need to look at
2067 the insn that uses CC0 to see what type of insn it is.
2068 In that case, the call to recog via validate_change will
2069 fail. So don't substitute constants here. Instead,
2070 do it when we emit the following insn.
2071
2072 For example, see the pyr.md file. That machine has signed and
2073 unsigned compares. The compare patterns must check the
2074 following branch insn to see which what kind of compare to
2075 emit.
2076
2077 If the previous insn set CC0, substitute constants on it as
2078 well. */
2079 if (sets_cc0_p (PATTERN (copy)) != 0)
2080 cc0_insn = copy;
2081 else
2082 {
2083 if (cc0_insn)
2084 try_constants (cc0_insn, map);
2085 cc0_insn = 0;
2086 try_constants (copy, map);
2087 }
2088 #else
2089 try_constants (copy, map);
2090 #endif
2091
2092 /* Make split induction variable constants `permanent' since we
2093 know there are no backward branches across iteration variable
2094 settings which would invalidate this. */
2095 if (dest_reg_was_split)
2096 {
2097 int regno = REGNO (SET_DEST (set));
2098
2099 if ((size_t) regno < VARRAY_SIZE (map->const_equiv_varray)
2100 && (VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age
2101 == map->const_age))
2102 VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age = -1;
2103 }
2104 break;
2105
2106 case JUMP_INSN:
2107 pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0);
2108 copy = emit_jump_insn (pattern);
2109 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2110 INSN_SCOPE (copy) = INSN_SCOPE (insn);
2111
2112 if (JUMP_LABEL (insn))
2113 {
2114 JUMP_LABEL (copy) = get_label_from_map (map,
2115 CODE_LABEL_NUMBER
2116 (JUMP_LABEL (insn)));
2117 LABEL_NUSES (JUMP_LABEL (copy))++;
2118 }
2119 if (JUMP_LABEL (insn) == start_label && insn == copy_end
2120 && ! last_iteration)
2121 {
2122
2123 /* This is a branch to the beginning of the loop; this is the
2124 last insn being copied; and this is not the last iteration.
2125 In this case, we want to change the original fall through
2126 case to be a branch past the end of the loop, and the
2127 original jump label case to fall_through. */
2128
2129 if (!invert_jump (copy, exit_label, 0))
2130 {
2131 rtx jmp;
2132 rtx lab = gen_label_rtx ();
2133 /* Can't do it by reversing the jump (probably because we
2134 couldn't reverse the conditions), so emit a new
2135 jump_insn after COPY, and redirect the jump around
2136 that. */
2137 jmp = emit_jump_insn_after (gen_jump (exit_label), copy);
2138 JUMP_LABEL (jmp) = exit_label;
2139 LABEL_NUSES (exit_label)++;
2140 jmp = emit_barrier_after (jmp);
2141 emit_label_after (lab, jmp);
2142 LABEL_NUSES (lab) = 0;
2143 if (!redirect_jump (copy, lab, 0))
2144 abort ();
2145 }
2146 }
2147
2148 #ifdef HAVE_cc0
2149 if (cc0_insn)
2150 try_constants (cc0_insn, map);
2151 cc0_insn = 0;
2152 #endif
2153 try_constants (copy, map);
2154
2155 /* Set the jump label of COPY correctly to avoid problems with
2156 later passes of unroll_loop, if INSN had jump label set. */
2157 if (JUMP_LABEL (insn))
2158 {
2159 rtx label = 0;
2160
2161 /* Can't use the label_map for every insn, since this may be
2162 the backward branch, and hence the label was not mapped. */
2163 if ((set = single_set (copy)))
2164 {
2165 tem = SET_SRC (set);
2166 if (GET_CODE (tem) == LABEL_REF)
2167 label = XEXP (tem, 0);
2168 else if (GET_CODE (tem) == IF_THEN_ELSE)
2169 {
2170 if (XEXP (tem, 1) != pc_rtx)
2171 label = XEXP (XEXP (tem, 1), 0);
2172 else
2173 label = XEXP (XEXP (tem, 2), 0);
2174 }
2175 }
2176
2177 if (label && GET_CODE (label) == CODE_LABEL)
2178 JUMP_LABEL (copy) = label;
2179 else
2180 {
2181 /* An unrecognizable jump insn, probably the entry jump
2182 for a switch statement. This label must have been mapped,
2183 so just use the label_map to get the new jump label. */
2184 JUMP_LABEL (copy)
2185 = get_label_from_map (map,
2186 CODE_LABEL_NUMBER (JUMP_LABEL (insn)));
2187 }
2188
2189 /* If this is a non-local jump, then must increase the label
2190 use count so that the label will not be deleted when the
2191 original jump is deleted. */
2192 LABEL_NUSES (JUMP_LABEL (copy))++;
2193 }
2194 else if (GET_CODE (PATTERN (copy)) == ADDR_VEC
2195 || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC)
2196 {
2197 rtx pat = PATTERN (copy);
2198 int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
2199 int len = XVECLEN (pat, diff_vec_p);
2200 int i;
2201
2202 for (i = 0; i < len; i++)
2203 LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++;
2204 }
2205
2206 /* If this used to be a conditional jump insn but whose branch
2207 direction is now known, we must do something special. */
2208 if (any_condjump_p (insn) && onlyjump_p (insn) && map->last_pc_value)
2209 {
2210 #ifdef HAVE_cc0
2211 /* If the previous insn set cc0 for us, delete it. */
2212 if (only_sets_cc0_p (PREV_INSN (copy)))
2213 delete_related_insns (PREV_INSN (copy));
2214 #endif
2215
2216 /* If this is now a no-op, delete it. */
2217 if (map->last_pc_value == pc_rtx)
2218 {
2219 delete_insn (copy);
2220 copy = 0;
2221 }
2222 else
2223 /* Otherwise, this is unconditional jump so we must put a
2224 BARRIER after it. We could do some dead code elimination
2225 here, but jump.c will do it just as well. */
2226 emit_barrier ();
2227 }
2228 break;
2229
2230 case CALL_INSN:
2231 pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0);
2232 copy = emit_call_insn (pattern);
2233 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2234 INSN_SCOPE (copy) = INSN_SCOPE (insn);
2235 SIBLING_CALL_P (copy) = SIBLING_CALL_P (insn);
2236 CONST_OR_PURE_CALL_P (copy) = CONST_OR_PURE_CALL_P (insn);
2237
2238 /* Because the USAGE information potentially contains objects other
2239 than hard registers, we need to copy it. */
2240 CALL_INSN_FUNCTION_USAGE (copy)
2241 = copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn),
2242 map, 0);
2243
2244 #ifdef HAVE_cc0
2245 if (cc0_insn)
2246 try_constants (cc0_insn, map);
2247 cc0_insn = 0;
2248 #endif
2249 try_constants (copy, map);
2250
2251 /* Be lazy and assume CALL_INSNs clobber all hard registers. */
2252 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2253 VARRAY_CONST_EQUIV (map->const_equiv_varray, i).rtx = 0;
2254 break;
2255
2256 case CODE_LABEL:
2257 /* If this is the loop start label, then we don't need to emit a
2258 copy of this label since no one will use it. */
2259
2260 if (insn != start_label)
2261 {
2262 copy = emit_label (get_label_from_map (map,
2263 CODE_LABEL_NUMBER (insn)));
2264 map->const_age++;
2265 }
2266 break;
2267
2268 case BARRIER:
2269 copy = emit_barrier ();
2270 break;
2271
2272 case NOTE:
2273 /* VTOP and CONT notes are valid only before the loop exit test.
2274 If placed anywhere else, loop may generate bad code. */
2275 /* BASIC_BLOCK notes exist to stabilize basic block structures with
2276 the associated rtl. We do not want to share the structure in
2277 this new block. */
2278
2279 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2280 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL
2281 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK
2282 && ((NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP
2283 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT)
2284 || (last_iteration && unroll_type != UNROLL_COMPLETELY)))
2285 copy = emit_note (NOTE_SOURCE_FILE (insn),
2286 NOTE_LINE_NUMBER (insn));
2287 else
2288 copy = 0;
2289 break;
2290
2291 default:
2292 abort ();
2293 }
2294
2295 map->insn_map[INSN_UID (insn)] = copy;
2296 }
2297 while (insn != copy_end);
2298
2299 /* Now finish coping the REG_NOTES. */
2300 insn = copy_start;
2301 do
2302 {
2303 insn = NEXT_INSN (insn);
2304 if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN
2305 || GET_CODE (insn) == CALL_INSN)
2306 && map->insn_map[INSN_UID (insn)])
2307 final_reg_note_copy (®_NOTES (map->insn_map[INSN_UID (insn)]), map);
2308 }
2309 while (insn != copy_end);
2310
2311 /* There may be notes between copy_notes_from and loop_end. Emit a copy of
2312 each of these notes here, since there may be some important ones, such as
2313 NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last
2314 iteration, because the original notes won't be deleted.
2315
2316 We can't use insert_before here, because when from preconditioning,
2317 insert_before points before the loop. We can't use copy_end, because
2318 there may be insns already inserted after it (which we don't want to
2319 copy) when not from preconditioning code. */
2320
2321 if (! last_iteration)
2322 {
2323 for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn))
2324 {
2325 /* VTOP notes are valid only before the loop exit test.
2326 If placed anywhere else, loop may generate bad code.
2327 Although COPY_NOTES_FROM will be at most one or two (for cc0)
2328 instructions before the last insn in the loop, COPY_NOTES_FROM
2329 can be a NOTE_INSN_LOOP_CONT note if there is no VTOP note,
2330 as in a do .. while loop. */
2331 if (GET_CODE (insn) == NOTE
2332 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2333 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK
2334 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP
2335 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT)
2336 emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn));
2337 }
2338 }
2339
2340 if (final_label && LABEL_NUSES (final_label) > 0)
2341 emit_label (final_label);
2342
2343 tem = get_insns ();
2344 end_sequence ();
2345 loop_insn_emit_before (loop, 0, insert_before, tem);
2346 }
2347
2348 /* Emit an insn, using the expand_binop to ensure that a valid insn is
2349 emitted. This will correctly handle the case where the increment value
2350 won't fit in the immediate field of a PLUS insns. */
2351
2352 void
emit_unrolled_add(dest_reg,src_reg,increment)2353 emit_unrolled_add (dest_reg, src_reg, increment)
2354 rtx dest_reg, src_reg, increment;
2355 {
2356 rtx result;
2357
2358 result = expand_simple_binop (GET_MODE (dest_reg), PLUS, src_reg, increment,
2359 dest_reg, 0, OPTAB_LIB_WIDEN);
2360
2361 if (dest_reg != result)
2362 emit_move_insn (dest_reg, result);
2363 }
2364
2365 /* Searches the insns between INSN and LOOP->END. Returns 1 if there
2366 is a backward branch in that range that branches to somewhere between
2367 LOOP->START and INSN. Returns 0 otherwise. */
2368
2369 /* ??? This is quadratic algorithm. Could be rewritten to be linear.
2370 In practice, this is not a problem, because this function is seldom called,
2371 and uses a negligible amount of CPU time on average. */
2372
2373 int
back_branch_in_range_p(loop,insn)2374 back_branch_in_range_p (loop, insn)
2375 const struct loop *loop;
2376 rtx insn;
2377 {
2378 rtx p, q, target_insn;
2379 rtx loop_start = loop->start;
2380 rtx loop_end = loop->end;
2381 rtx orig_loop_end = loop->end;
2382
2383 /* Stop before we get to the backward branch at the end of the loop. */
2384 loop_end = prev_nonnote_insn (loop_end);
2385 if (GET_CODE (loop_end) == BARRIER)
2386 loop_end = PREV_INSN (loop_end);
2387
2388 /* Check in case insn has been deleted, search forward for first non
2389 deleted insn following it. */
2390 while (INSN_DELETED_P (insn))
2391 insn = NEXT_INSN (insn);
2392
2393 /* Check for the case where insn is the last insn in the loop. Deal
2394 with the case where INSN was a deleted loop test insn, in which case
2395 it will now be the NOTE_LOOP_END. */
2396 if (insn == loop_end || insn == orig_loop_end)
2397 return 0;
2398
2399 for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
2400 {
2401 if (GET_CODE (p) == JUMP_INSN)
2402 {
2403 target_insn = JUMP_LABEL (p);
2404
2405 /* Search from loop_start to insn, to see if one of them is
2406 the target_insn. We can't use INSN_LUID comparisons here,
2407 since insn may not have an LUID entry. */
2408 for (q = loop_start; q != insn; q = NEXT_INSN (q))
2409 if (q == target_insn)
2410 return 1;
2411 }
2412 }
2413
2414 return 0;
2415 }
2416
2417 /* Try to generate the simplest rtx for the expression
2418 (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial
2419 value of giv's. */
2420
2421 static rtx
fold_rtx_mult_add(mult1,mult2,add1,mode)2422 fold_rtx_mult_add (mult1, mult2, add1, mode)
2423 rtx mult1, mult2, add1;
2424 enum machine_mode mode;
2425 {
2426 rtx temp, mult_res;
2427 rtx result;
2428
2429 /* The modes must all be the same. This should always be true. For now,
2430 check to make sure. */
2431 if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
2432 || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
2433 || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
2434 abort ();
2435
2436 /* Ensure that if at least one of mult1/mult2 are constant, then mult2
2437 will be a constant. */
2438 if (GET_CODE (mult1) == CONST_INT)
2439 {
2440 temp = mult2;
2441 mult2 = mult1;
2442 mult1 = temp;
2443 }
2444
2445 mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
2446 if (! mult_res)
2447 mult_res = gen_rtx_MULT (mode, mult1, mult2);
2448
2449 /* Again, put the constant second. */
2450 if (GET_CODE (add1) == CONST_INT)
2451 {
2452 temp = add1;
2453 add1 = mult_res;
2454 mult_res = temp;
2455 }
2456
2457 result = simplify_binary_operation (PLUS, mode, add1, mult_res);
2458 if (! result)
2459 result = gen_rtx_PLUS (mode, add1, mult_res);
2460
2461 return result;
2462 }
2463
2464 /* Searches the list of induction struct's for the biv BL, to try to calculate
2465 the total increment value for one iteration of the loop as a constant.
2466
2467 Returns the increment value as an rtx, simplified as much as possible,
2468 if it can be calculated. Otherwise, returns 0. */
2469
2470 rtx
biv_total_increment(bl)2471 biv_total_increment (bl)
2472 const struct iv_class *bl;
2473 {
2474 struct induction *v;
2475 rtx result;
2476
2477 /* For increment, must check every instruction that sets it. Each
2478 instruction must be executed only once each time through the loop.
2479 To verify this, we check that the insn is always executed, and that
2480 there are no backward branches after the insn that branch to before it.
2481 Also, the insn must have a mult_val of one (to make sure it really is
2482 an increment). */
2483
2484 result = const0_rtx;
2485 for (v = bl->biv; v; v = v->next_iv)
2486 {
2487 if (v->always_computable && v->mult_val == const1_rtx
2488 && ! v->maybe_multiple
2489 && SCALAR_INT_MODE_P (v->mode))
2490 {
2491 /* If we have already counted it, skip it. */
2492 if (v->same)
2493 continue;
2494
2495 result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
2496 }
2497 else
2498 return 0;
2499 }
2500
2501 return result;
2502 }
2503
2504 /* For each biv and giv, determine whether it can be safely split into
2505 a different variable for each unrolled copy of the loop body. If it
2506 is safe to split, then indicate that by saving some useful info
2507 in the splittable_regs array.
2508
2509 If the loop is being completely unrolled, then splittable_regs will hold
2510 the current value of the induction variable while the loop is unrolled.
2511 It must be set to the initial value of the induction variable here.
2512 Otherwise, splittable_regs will hold the difference between the current
2513 value of the induction variable and the value the induction variable had
2514 at the top of the loop. It must be set to the value 0 here.
2515
2516 Returns the total number of instructions that set registers that are
2517 splittable. */
2518
2519 /* ?? If the loop is only unrolled twice, then most of the restrictions to
2520 constant values are unnecessary, since we can easily calculate increment
2521 values in this case even if nothing is constant. The increment value
2522 should not involve a multiply however. */
2523
2524 /* ?? Even if the biv/giv increment values aren't constant, it may still
2525 be beneficial to split the variable if the loop is only unrolled a few
2526 times, since multiplies by small integers (1,2,3,4) are very cheap. */
2527
2528 static int
find_splittable_regs(loop,unroll_type,unroll_number)2529 find_splittable_regs (loop, unroll_type, unroll_number)
2530 const struct loop *loop;
2531 enum unroll_types unroll_type;
2532 int unroll_number;
2533 {
2534 struct loop_ivs *ivs = LOOP_IVS (loop);
2535 struct iv_class *bl;
2536 struct induction *v;
2537 rtx increment, tem;
2538 rtx biv_final_value;
2539 int biv_splittable;
2540 int result = 0;
2541
2542 for (bl = ivs->list; bl; bl = bl->next)
2543 {
2544 /* Biv_total_increment must return a constant value,
2545 otherwise we can not calculate the split values. */
2546
2547 increment = biv_total_increment (bl);
2548 if (! increment || GET_CODE (increment) != CONST_INT)
2549 continue;
2550
2551 /* The loop must be unrolled completely, or else have a known number
2552 of iterations and only one exit, or else the biv must be dead
2553 outside the loop, or else the final value must be known. Otherwise,
2554 it is unsafe to split the biv since it may not have the proper
2555 value on loop exit. */
2556
2557 /* loop_number_exit_count is nonzero if the loop has an exit other than
2558 a fall through at the end. */
2559
2560 biv_splittable = 1;
2561 biv_final_value = 0;
2562 if (unroll_type != UNROLL_COMPLETELY
2563 && (loop->exit_count || unroll_type == UNROLL_NAIVE)
2564 && (REGNO_LAST_LUID (bl->regno) >= INSN_LUID (loop->end)
2565 || ! bl->init_insn
2566 || INSN_UID (bl->init_insn) >= max_uid_for_loop
2567 || (REGNO_FIRST_LUID (bl->regno)
2568 < INSN_LUID (bl->init_insn))
2569 || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
2570 && ! (biv_final_value = final_biv_value (loop, bl)))
2571 biv_splittable = 0;
2572
2573 /* If any of the insns setting the BIV don't do so with a simple
2574 PLUS, we don't know how to split it. */
2575 for (v = bl->biv; biv_splittable && v; v = v->next_iv)
2576 if ((tem = single_set (v->insn)) == 0
2577 || GET_CODE (SET_DEST (tem)) != REG
2578 || REGNO (SET_DEST (tem)) != bl->regno
2579 || GET_CODE (SET_SRC (tem)) != PLUS)
2580 biv_splittable = 0;
2581
2582 /* If final value is nonzero, then must emit an instruction which sets
2583 the value of the biv to the proper value. This is done after
2584 handling all of the givs, since some of them may need to use the
2585 biv's value in their initialization code. */
2586
2587 /* This biv is splittable. If completely unrolling the loop, save
2588 the biv's initial value. Otherwise, save the constant zero. */
2589
2590 if (biv_splittable == 1)
2591 {
2592 if (unroll_type == UNROLL_COMPLETELY)
2593 {
2594 /* If the initial value of the biv is itself (i.e. it is too
2595 complicated for strength_reduce to compute), or is a hard
2596 register, or it isn't invariant, then we must create a new
2597 pseudo reg to hold the initial value of the biv. */
2598
2599 if (GET_CODE (bl->initial_value) == REG
2600 && (REGNO (bl->initial_value) == bl->regno
2601 || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER
2602 || ! loop_invariant_p (loop, bl->initial_value)))
2603 {
2604 rtx tem = gen_reg_rtx (bl->biv->mode);
2605
2606 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2607 loop_insn_hoist (loop,
2608 gen_move_insn (tem, bl->biv->src_reg));
2609
2610 if (loop_dump_stream)
2611 fprintf (loop_dump_stream,
2612 "Biv %d initial value remapped to %d.\n",
2613 bl->regno, REGNO (tem));
2614
2615 splittable_regs[bl->regno] = tem;
2616 }
2617 else
2618 splittable_regs[bl->regno] = bl->initial_value;
2619 }
2620 else
2621 splittable_regs[bl->regno] = const0_rtx;
2622
2623 /* Save the number of instructions that modify the biv, so that
2624 we can treat the last one specially. */
2625
2626 splittable_regs_updates[bl->regno] = bl->biv_count;
2627 result += bl->biv_count;
2628
2629 if (loop_dump_stream)
2630 fprintf (loop_dump_stream,
2631 "Biv %d safe to split.\n", bl->regno);
2632 }
2633
2634 /* Check every giv that depends on this biv to see whether it is
2635 splittable also. Even if the biv isn't splittable, givs which
2636 depend on it may be splittable if the biv is live outside the
2637 loop, and the givs aren't. */
2638
2639 result += find_splittable_givs (loop, bl, unroll_type, increment,
2640 unroll_number);
2641
2642 /* If final value is nonzero, then must emit an instruction which sets
2643 the value of the biv to the proper value. This is done after
2644 handling all of the givs, since some of them may need to use the
2645 biv's value in their initialization code. */
2646 if (biv_final_value)
2647 {
2648 /* If the loop has multiple exits, emit the insns before the
2649 loop to ensure that it will always be executed no matter
2650 how the loop exits. Otherwise emit the insn after the loop,
2651 since this is slightly more efficient. */
2652 if (! loop->exit_count)
2653 loop_insn_sink (loop, gen_move_insn (bl->biv->src_reg,
2654 biv_final_value));
2655 else
2656 {
2657 /* Create a new register to hold the value of the biv, and then
2658 set the biv to its final value before the loop start. The biv
2659 is set to its final value before loop start to ensure that
2660 this insn will always be executed, no matter how the loop
2661 exits. */
2662 rtx tem = gen_reg_rtx (bl->biv->mode);
2663 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2664
2665 loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg));
2666 loop_insn_hoist (loop, gen_move_insn (bl->biv->src_reg,
2667 biv_final_value));
2668
2669 if (loop_dump_stream)
2670 fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n",
2671 REGNO (bl->biv->src_reg), REGNO (tem));
2672
2673 /* Set up the mapping from the original biv register to the new
2674 register. */
2675 bl->biv->src_reg = tem;
2676 }
2677 }
2678 }
2679 return result;
2680 }
2681
2682 /* For every giv based on the biv BL, check to determine whether it is
2683 splittable. This is a subroutine to find_splittable_regs ().
2684
2685 Return the number of instructions that set splittable registers. */
2686
2687 static int
find_splittable_givs(loop,bl,unroll_type,increment,unroll_number)2688 find_splittable_givs (loop, bl, unroll_type, increment, unroll_number)
2689 const struct loop *loop;
2690 struct iv_class *bl;
2691 enum unroll_types unroll_type;
2692 rtx increment;
2693 int unroll_number ATTRIBUTE_UNUSED;
2694 {
2695 struct loop_ivs *ivs = LOOP_IVS (loop);
2696 struct induction *v, *v2;
2697 rtx final_value;
2698 rtx tem;
2699 int result = 0;
2700
2701 /* Scan the list of givs, and set the same_insn field when there are
2702 multiple identical givs in the same insn. */
2703 for (v = bl->giv; v; v = v->next_iv)
2704 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2705 if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg)
2706 && ! v2->same_insn)
2707 v2->same_insn = v;
2708
2709 for (v = bl->giv; v; v = v->next_iv)
2710 {
2711 rtx giv_inc, value;
2712
2713 /* Only split the giv if it has already been reduced, or if the loop is
2714 being completely unrolled. */
2715 if (unroll_type != UNROLL_COMPLETELY && v->ignore)
2716 continue;
2717
2718 /* The giv can be split if the insn that sets the giv is executed once
2719 and only once on every iteration of the loop. */
2720 /* An address giv can always be split. v->insn is just a use not a set,
2721 and hence it does not matter whether it is always executed. All that
2722 matters is that all the biv increments are always executed, and we
2723 won't reach here if they aren't. */
2724 if (v->giv_type != DEST_ADDR
2725 && (! v->always_computable
2726 || back_branch_in_range_p (loop, v->insn)))
2727 continue;
2728
2729 /* The giv increment value must be a constant. */
2730 giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2731 v->mode);
2732 if (! giv_inc || GET_CODE (giv_inc) != CONST_INT)
2733 continue;
2734
2735 /* The loop must be unrolled completely, or else have a known number of
2736 iterations and only one exit, or else the giv must be dead outside
2737 the loop, or else the final value of the giv must be known.
2738 Otherwise, it is not safe to split the giv since it may not have the
2739 proper value on loop exit. */
2740
2741 /* The used outside loop test will fail for DEST_ADDR givs. They are
2742 never used outside the loop anyways, so it is always safe to split a
2743 DEST_ADDR giv. */
2744
2745 final_value = 0;
2746 if (unroll_type != UNROLL_COMPLETELY
2747 && (loop->exit_count || unroll_type == UNROLL_NAIVE)
2748 && v->giv_type != DEST_ADDR
2749 /* The next part is true if the pseudo is used outside the loop.
2750 We assume that this is true for any pseudo created after loop
2751 starts, because we don't have a reg_n_info entry for them. */
2752 && (REGNO (v->dest_reg) >= max_reg_before_loop
2753 || (REGNO_FIRST_UID (REGNO (v->dest_reg)) != INSN_UID (v->insn)
2754 /* Check for the case where the pseudo is set by a shift/add
2755 sequence, in which case the first insn setting the pseudo
2756 is the first insn of the shift/add sequence. */
2757 && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX))
2758 || (REGNO_FIRST_UID (REGNO (v->dest_reg))
2759 != INSN_UID (XEXP (tem, 0)))))
2760 /* Line above always fails if INSN was moved by loop opt. */
2761 || (REGNO_LAST_LUID (REGNO (v->dest_reg))
2762 >= INSN_LUID (loop->end)))
2763 && ! (final_value = v->final_value))
2764 continue;
2765
2766 #if 0
2767 /* Currently, non-reduced/final-value givs are never split. */
2768 /* Should emit insns after the loop if possible, as the biv final value
2769 code below does. */
2770
2771 /* If the final value is nonzero, and the giv has not been reduced,
2772 then must emit an instruction to set the final value. */
2773 if (final_value && !v->new_reg)
2774 {
2775 /* Create a new register to hold the value of the giv, and then set
2776 the giv to its final value before the loop start. The giv is set
2777 to its final value before loop start to ensure that this insn
2778 will always be executed, no matter how we exit. */
2779 tem = gen_reg_rtx (v->mode);
2780 loop_insn_hoist (loop, gen_move_insn (tem, v->dest_reg));
2781 loop_insn_hoist (loop, gen_move_insn (v->dest_reg, final_value));
2782
2783 if (loop_dump_stream)
2784 fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n",
2785 REGNO (v->dest_reg), REGNO (tem));
2786
2787 v->src_reg = tem;
2788 }
2789 #endif
2790
2791 /* This giv is splittable. If completely unrolling the loop, save the
2792 giv's initial value. Otherwise, save the constant zero for it. */
2793
2794 if (unroll_type == UNROLL_COMPLETELY)
2795 {
2796 /* It is not safe to use bl->initial_value here, because it may not
2797 be invariant. It is safe to use the initial value stored in
2798 the splittable_regs array if it is set. In rare cases, it won't
2799 be set, so then we do exactly the same thing as
2800 find_splittable_regs does to get a safe value. */
2801 rtx biv_initial_value;
2802
2803 if (splittable_regs[bl->regno])
2804 biv_initial_value = splittable_regs[bl->regno];
2805 else if (GET_CODE (bl->initial_value) != REG
2806 || (REGNO (bl->initial_value) != bl->regno
2807 && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER))
2808 biv_initial_value = bl->initial_value;
2809 else
2810 {
2811 rtx tem = gen_reg_rtx (bl->biv->mode);
2812
2813 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2814 loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg));
2815 biv_initial_value = tem;
2816 }
2817 biv_initial_value = extend_value_for_giv (v, biv_initial_value);
2818 value = fold_rtx_mult_add (v->mult_val, biv_initial_value,
2819 v->add_val, v->mode);
2820 }
2821 else
2822 value = const0_rtx;
2823
2824 if (v->new_reg)
2825 {
2826 /* If a giv was combined with another giv, then we can only split
2827 this giv if the giv it was combined with was reduced. This
2828 is because the value of v->new_reg is meaningless in this
2829 case. */
2830 if (v->same && ! v->same->new_reg)
2831 {
2832 if (loop_dump_stream)
2833 fprintf (loop_dump_stream,
2834 "giv combined with unreduced giv not split.\n");
2835 continue;
2836 }
2837 /* If the giv is an address destination, it could be something other
2838 than a simple register, these have to be treated differently. */
2839 else if (v->giv_type == DEST_REG)
2840 {
2841 /* If value is not a constant, register, or register plus
2842 constant, then compute its value into a register before
2843 loop start. This prevents invalid rtx sharing, and should
2844 generate better code. We can use bl->initial_value here
2845 instead of splittable_regs[bl->regno] because this code
2846 is going before the loop start. */
2847 if (unroll_type == UNROLL_COMPLETELY
2848 && GET_CODE (value) != CONST_INT
2849 && GET_CODE (value) != REG
2850 && (GET_CODE (value) != PLUS
2851 || GET_CODE (XEXP (value, 0)) != REG
2852 || GET_CODE (XEXP (value, 1)) != CONST_INT))
2853 {
2854 rtx tem = gen_reg_rtx (v->mode);
2855 record_base_value (REGNO (tem), v->add_val, 0);
2856 loop_iv_add_mult_hoist (loop,
2857 extend_value_for_giv (v, bl->initial_value),
2858 v->mult_val, v->add_val, tem);
2859 value = tem;
2860 }
2861
2862 splittable_regs[reg_or_subregno (v->new_reg)] = value;
2863 }
2864 else
2865 continue;
2866 }
2867 else
2868 {
2869 #if 0
2870 /* Currently, unreduced giv's can't be split. This is not too much
2871 of a problem since unreduced giv's are not live across loop
2872 iterations anyways. When unrolling a loop completely though,
2873 it makes sense to reduce&split givs when possible, as this will
2874 result in simpler instructions, and will not require that a reg
2875 be live across loop iterations. */
2876
2877 splittable_regs[REGNO (v->dest_reg)] = value;
2878 fprintf (stderr, "Giv %d at insn %d not reduced\n",
2879 REGNO (v->dest_reg), INSN_UID (v->insn));
2880 #else
2881 continue;
2882 #endif
2883 }
2884
2885 /* Unreduced givs are only updated once by definition. Reduced givs
2886 are updated as many times as their biv is. Mark it so if this is
2887 a splittable register. Don't need to do anything for address givs
2888 where this may not be a register. */
2889
2890 if (GET_CODE (v->new_reg) == REG)
2891 {
2892 int count = 1;
2893 if (! v->ignore)
2894 count = REG_IV_CLASS (ivs, REGNO (v->src_reg))->biv_count;
2895
2896 splittable_regs_updates[reg_or_subregno (v->new_reg)] = count;
2897 }
2898
2899 result++;
2900
2901 if (loop_dump_stream)
2902 {
2903 int regnum;
2904
2905 if (GET_CODE (v->dest_reg) == CONST_INT)
2906 regnum = -1;
2907 else if (GET_CODE (v->dest_reg) != REG)
2908 regnum = REGNO (XEXP (v->dest_reg, 0));
2909 else
2910 regnum = REGNO (v->dest_reg);
2911 fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n",
2912 regnum, INSN_UID (v->insn));
2913 }
2914 }
2915
2916 return result;
2917 }
2918
2919 /* Try to prove that the register is dead after the loop exits. Trace every
2920 loop exit looking for an insn that will always be executed, which sets
2921 the register to some value, and appears before the first use of the register
2922 is found. If successful, then return 1, otherwise return 0. */
2923
2924 /* ?? Could be made more intelligent in the handling of jumps, so that
2925 it can search past if statements and other similar structures. */
2926
2927 static int
reg_dead_after_loop(loop,reg)2928 reg_dead_after_loop (loop, reg)
2929 const struct loop *loop;
2930 rtx reg;
2931 {
2932 rtx insn, label;
2933 enum rtx_code code;
2934 int jump_count = 0;
2935 int label_count = 0;
2936
2937 /* In addition to checking all exits of this loop, we must also check
2938 all exits of inner nested loops that would exit this loop. We don't
2939 have any way to identify those, so we just give up if there are any
2940 such inner loop exits. */
2941
2942 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
2943 label_count++;
2944
2945 if (label_count != loop->exit_count)
2946 return 0;
2947
2948 /* HACK: Must also search the loop fall through exit, create a label_ref
2949 here which points to the loop->end, and append the loop_number_exit_labels
2950 list to it. */
2951 label = gen_rtx_LABEL_REF (VOIDmode, loop->end);
2952 LABEL_NEXTREF (label) = loop->exit_labels;
2953
2954 for (; label; label = LABEL_NEXTREF (label))
2955 {
2956 /* Succeed if find an insn which sets the biv or if reach end of
2957 function. Fail if find an insn that uses the biv, or if come to
2958 a conditional jump. */
2959
2960 insn = NEXT_INSN (XEXP (label, 0));
2961 while (insn)
2962 {
2963 code = GET_CODE (insn);
2964 if (GET_RTX_CLASS (code) == 'i')
2965 {
2966 rtx set, note;
2967
2968 if (reg_referenced_p (reg, PATTERN (insn)))
2969 return 0;
2970
2971 note = find_reg_equal_equiv_note (insn);
2972 if (note && reg_overlap_mentioned_p (reg, XEXP (note, 0)))
2973 return 0;
2974
2975 set = single_set (insn);
2976 if (set && rtx_equal_p (SET_DEST (set), reg))
2977 break;
2978 }
2979
2980 if (code == JUMP_INSN)
2981 {
2982 if (GET_CODE (PATTERN (insn)) == RETURN)
2983 break;
2984 else if (!any_uncondjump_p (insn)
2985 /* Prevent infinite loop following infinite loops. */
2986 || jump_count++ > 20)
2987 return 0;
2988 else
2989 insn = JUMP_LABEL (insn);
2990 }
2991
2992 insn = NEXT_INSN (insn);
2993 }
2994 }
2995
2996 /* Success, the register is dead on all loop exits. */
2997 return 1;
2998 }
2999
3000 /* Try to calculate the final value of the biv, the value it will have at
3001 the end of the loop. If we can do it, return that value. */
3002
3003 rtx
final_biv_value(loop,bl)3004 final_biv_value (loop, bl)
3005 const struct loop *loop;
3006 struct iv_class *bl;
3007 {
3008 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
3009 rtx increment, tem;
3010
3011 /* ??? This only works for MODE_INT biv's. Reject all others for now. */
3012
3013 if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
3014 return 0;
3015
3016 /* The final value for reversed bivs must be calculated differently than
3017 for ordinary bivs. In this case, there is already an insn after the
3018 loop which sets this biv's final value (if necessary), and there are
3019 no other loop exits, so we can return any value. */
3020 if (bl->reversed)
3021 {
3022 if (loop_dump_stream)
3023 fprintf (loop_dump_stream,
3024 "Final biv value for %d, reversed biv.\n", bl->regno);
3025
3026 return const0_rtx;
3027 }
3028
3029 /* Try to calculate the final value as initial value + (number of iterations
3030 * increment). For this to work, increment must be invariant, the only
3031 exit from the loop must be the fall through at the bottom (otherwise
3032 it may not have its final value when the loop exits), and the initial
3033 value of the biv must be invariant. */
3034
3035 if (n_iterations != 0
3036 && ! loop->exit_count
3037 && loop_invariant_p (loop, bl->initial_value))
3038 {
3039 increment = biv_total_increment (bl);
3040
3041 if (increment && loop_invariant_p (loop, increment))
3042 {
3043 /* Can calculate the loop exit value, emit insns after loop
3044 end to calculate this value into a temporary register in
3045 case it is needed later. */
3046
3047 tem = gen_reg_rtx (bl->biv->mode);
3048 record_base_value (REGNO (tem), bl->biv->add_val, 0);
3049 loop_iv_add_mult_sink (loop, increment, GEN_INT (n_iterations),
3050 bl->initial_value, tem);
3051
3052 if (loop_dump_stream)
3053 fprintf (loop_dump_stream,
3054 "Final biv value for %d, calculated.\n", bl->regno);
3055
3056 return tem;
3057 }
3058 }
3059
3060 /* Check to see if the biv is dead at all loop exits. */
3061 if (reg_dead_after_loop (loop, bl->biv->src_reg))
3062 {
3063 if (loop_dump_stream)
3064 fprintf (loop_dump_stream,
3065 "Final biv value for %d, biv dead after loop exit.\n",
3066 bl->regno);
3067
3068 return const0_rtx;
3069 }
3070
3071 return 0;
3072 }
3073
3074 /* Try to calculate the final value of the giv, the value it will have at
3075 the end of the loop. If we can do it, return that value. */
3076
3077 rtx
final_giv_value(loop,v)3078 final_giv_value (loop, v)
3079 const struct loop *loop;
3080 struct induction *v;
3081 {
3082 struct loop_ivs *ivs = LOOP_IVS (loop);
3083 struct iv_class *bl;
3084 rtx insn;
3085 rtx increment, tem;
3086 rtx seq;
3087 rtx loop_end = loop->end;
3088 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
3089
3090 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
3091
3092 /* The final value for givs which depend on reversed bivs must be calculated
3093 differently than for ordinary givs. In this case, there is already an
3094 insn after the loop which sets this giv's final value (if necessary),
3095 and there are no other loop exits, so we can return any value. */
3096 if (bl->reversed)
3097 {
3098 if (loop_dump_stream)
3099 fprintf (loop_dump_stream,
3100 "Final giv value for %d, depends on reversed biv\n",
3101 REGNO (v->dest_reg));
3102 return const0_rtx;
3103 }
3104
3105 /* Try to calculate the final value as a function of the biv it depends
3106 upon. The only exit from the loop must be the fall through at the bottom
3107 and the insn that sets the giv must be executed on every iteration
3108 (otherwise the giv may not have its final value when the loop exits). */
3109
3110 /* ??? Can calculate the final giv value by subtracting off the
3111 extra biv increments times the giv's mult_val. The loop must have
3112 only one exit for this to work, but the loop iterations does not need
3113 to be known. */
3114
3115 if (n_iterations != 0
3116 && ! loop->exit_count
3117 && v->always_executed)
3118 {
3119 /* ?? It is tempting to use the biv's value here since these insns will
3120 be put after the loop, and hence the biv will have its final value
3121 then. However, this fails if the biv is subsequently eliminated.
3122 Perhaps determine whether biv's are eliminable before trying to
3123 determine whether giv's are replaceable so that we can use the
3124 biv value here if it is not eliminable. */
3125
3126 /* We are emitting code after the end of the loop, so we must make
3127 sure that bl->initial_value is still valid then. It will still
3128 be valid if it is invariant. */
3129
3130 increment = biv_total_increment (bl);
3131
3132 if (increment && loop_invariant_p (loop, increment)
3133 && loop_invariant_p (loop, bl->initial_value))
3134 {
3135 /* Can calculate the loop exit value of its biv as
3136 (n_iterations * increment) + initial_value */
3137
3138 /* The loop exit value of the giv is then
3139 (final_biv_value - extra increments) * mult_val + add_val.
3140 The extra increments are any increments to the biv which
3141 occur in the loop after the giv's value is calculated.
3142 We must search from the insn that sets the giv to the end
3143 of the loop to calculate this value. */
3144
3145 /* Put the final biv value in tem. */
3146 tem = gen_reg_rtx (v->mode);
3147 record_base_value (REGNO (tem), bl->biv->add_val, 0);
3148 loop_iv_add_mult_sink (loop, extend_value_for_giv (v, increment),
3149 GEN_INT (n_iterations),
3150 extend_value_for_giv (v, bl->initial_value),
3151 tem);
3152
3153 /* Subtract off extra increments as we find them. */
3154 for (insn = NEXT_INSN (v->insn); insn != loop_end;
3155 insn = NEXT_INSN (insn))
3156 {
3157 struct induction *biv;
3158
3159 for (biv = bl->biv; biv; biv = biv->next_iv)
3160 if (biv->insn == insn)
3161 {
3162 start_sequence ();
3163 tem = expand_simple_binop (GET_MODE (tem), MINUS, tem,
3164 biv->add_val, NULL_RTX, 0,
3165 OPTAB_LIB_WIDEN);
3166 seq = get_insns ();
3167 end_sequence ();
3168 loop_insn_sink (loop, seq);
3169 }
3170 }
3171
3172 /* Now calculate the giv's final value. */
3173 loop_iv_add_mult_sink (loop, tem, v->mult_val, v->add_val, tem);
3174
3175 if (loop_dump_stream)
3176 fprintf (loop_dump_stream,
3177 "Final giv value for %d, calc from biv's value.\n",
3178 REGNO (v->dest_reg));
3179
3180 return tem;
3181 }
3182 }
3183
3184 /* Replaceable giv's should never reach here. */
3185 if (v->replaceable)
3186 abort ();
3187
3188 /* Check to see if the biv is dead at all loop exits. */
3189 if (reg_dead_after_loop (loop, v->dest_reg))
3190 {
3191 if (loop_dump_stream)
3192 fprintf (loop_dump_stream,
3193 "Final giv value for %d, giv dead after loop exit.\n",
3194 REGNO (v->dest_reg));
3195
3196 return const0_rtx;
3197 }
3198
3199 return 0;
3200 }
3201
3202 /* Look back before LOOP->START for the insn that sets REG and return
3203 the equivalent constant if there is a REG_EQUAL note otherwise just
3204 the SET_SRC of REG. */
3205
3206 static rtx
loop_find_equiv_value(loop,reg)3207 loop_find_equiv_value (loop, reg)
3208 const struct loop *loop;
3209 rtx reg;
3210 {
3211 rtx loop_start = loop->start;
3212 rtx insn, set;
3213 rtx ret;
3214
3215 ret = reg;
3216 for (insn = PREV_INSN (loop_start); insn; insn = PREV_INSN (insn))
3217 {
3218 if (GET_CODE (insn) == CODE_LABEL)
3219 break;
3220
3221 else if (INSN_P (insn) && reg_set_p (reg, insn))
3222 {
3223 /* We found the last insn before the loop that sets the register.
3224 If it sets the entire register, and has a REG_EQUAL note,
3225 then use the value of the REG_EQUAL note. */
3226 if ((set = single_set (insn))
3227 && (SET_DEST (set) == reg))
3228 {
3229 rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3230
3231 /* Only use the REG_EQUAL note if it is a constant.
3232 Other things, divide in particular, will cause
3233 problems later if we use them. */
3234 if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
3235 && CONSTANT_P (XEXP (note, 0)))
3236 ret = XEXP (note, 0);
3237 else
3238 ret = SET_SRC (set);
3239
3240 /* We cannot do this if it changes between the
3241 assignment and loop start though. */
3242 if (modified_between_p (ret, insn, loop_start))
3243 ret = reg;
3244 }
3245 break;
3246 }
3247 }
3248 return ret;
3249 }
3250
3251 /* Return a simplified rtx for the expression OP - REG.
3252
3253 REG must appear in OP, and OP must be a register or the sum of a register
3254 and a second term.
3255
3256 Thus, the return value must be const0_rtx or the second term.
3257
3258 The caller is responsible for verifying that REG appears in OP and OP has
3259 the proper form. */
3260
3261 static rtx
subtract_reg_term(op,reg)3262 subtract_reg_term (op, reg)
3263 rtx op, reg;
3264 {
3265 if (op == reg)
3266 return const0_rtx;
3267 if (GET_CODE (op) == PLUS)
3268 {
3269 if (XEXP (op, 0) == reg)
3270 return XEXP (op, 1);
3271 else if (XEXP (op, 1) == reg)
3272 return XEXP (op, 0);
3273 }
3274 /* OP does not contain REG as a term. */
3275 abort ();
3276 }
3277
3278 /* Find and return register term common to both expressions OP0 and
3279 OP1 or NULL_RTX if no such term exists. Each expression must be a
3280 REG or a PLUS of a REG. */
3281
3282 static rtx
find_common_reg_term(op0,op1)3283 find_common_reg_term (op0, op1)
3284 rtx op0, op1;
3285 {
3286 if ((GET_CODE (op0) == REG || GET_CODE (op0) == PLUS)
3287 && (GET_CODE (op1) == REG || GET_CODE (op1) == PLUS))
3288 {
3289 rtx op00;
3290 rtx op01;
3291 rtx op10;
3292 rtx op11;
3293
3294 if (GET_CODE (op0) == PLUS)
3295 op01 = XEXP (op0, 1), op00 = XEXP (op0, 0);
3296 else
3297 op01 = const0_rtx, op00 = op0;
3298
3299 if (GET_CODE (op1) == PLUS)
3300 op11 = XEXP (op1, 1), op10 = XEXP (op1, 0);
3301 else
3302 op11 = const0_rtx, op10 = op1;
3303
3304 /* Find and return common register term if present. */
3305 if (REG_P (op00) && (op00 == op10 || op00 == op11))
3306 return op00;
3307 else if (REG_P (op01) && (op01 == op10 || op01 == op11))
3308 return op01;
3309 }
3310
3311 /* No common register term found. */
3312 return NULL_RTX;
3313 }
3314
3315 /* Determine the loop iterator and calculate the number of loop
3316 iterations. Returns the exact number of loop iterations if it can
3317 be calculated, otherwise returns zero. */
3318
3319 unsigned HOST_WIDE_INT
loop_iterations(loop)3320 loop_iterations (loop)
3321 struct loop *loop;
3322 {
3323 struct loop_info *loop_info = LOOP_INFO (loop);
3324 struct loop_ivs *ivs = LOOP_IVS (loop);
3325 rtx comparison, comparison_value;
3326 rtx iteration_var, initial_value, increment, final_value;
3327 enum rtx_code comparison_code;
3328 HOST_WIDE_INT inc;
3329 unsigned HOST_WIDE_INT abs_inc;
3330 unsigned HOST_WIDE_INT abs_diff;
3331 int off_by_one;
3332 int increment_dir;
3333 int unsigned_p, compare_dir, final_larger;
3334 rtx last_loop_insn;
3335 rtx reg_term;
3336 struct iv_class *bl;
3337
3338 loop_info->n_iterations = 0;
3339 loop_info->initial_value = 0;
3340 loop_info->initial_equiv_value = 0;
3341 loop_info->comparison_value = 0;
3342 loop_info->final_value = 0;
3343 loop_info->final_equiv_value = 0;
3344 loop_info->increment = 0;
3345 loop_info->iteration_var = 0;
3346 loop_info->unroll_number = 1;
3347 loop_info->iv = 0;
3348
3349 /* We used to use prev_nonnote_insn here, but that fails because it might
3350 accidentally get the branch for a contained loop if the branch for this
3351 loop was deleted. We can only trust branches immediately before the
3352 loop_end. */
3353 last_loop_insn = PREV_INSN (loop->end);
3354
3355 /* ??? We should probably try harder to find the jump insn
3356 at the end of the loop. The following code assumes that
3357 the last loop insn is a jump to the top of the loop. */
3358 if (GET_CODE (last_loop_insn) != JUMP_INSN)
3359 {
3360 if (loop_dump_stream)
3361 fprintf (loop_dump_stream,
3362 "Loop iterations: No final conditional branch found.\n");
3363 return 0;
3364 }
3365
3366 /* If there is a more than a single jump to the top of the loop
3367 we cannot (easily) determine the iteration count. */
3368 if (LABEL_NUSES (JUMP_LABEL (last_loop_insn)) > 1)
3369 {
3370 if (loop_dump_stream)
3371 fprintf (loop_dump_stream,
3372 "Loop iterations: Loop has multiple back edges.\n");
3373 return 0;
3374 }
3375
3376 /* If there are multiple conditionalized loop exit tests, they may jump
3377 back to differing CODE_LABELs. */
3378 if (loop->top && loop->cont)
3379 {
3380 rtx temp = PREV_INSN (last_loop_insn);
3381
3382 do
3383 {
3384 if (GET_CODE (temp) == JUMP_INSN)
3385 {
3386 /* There are some kinds of jumps we can't deal with easily. */
3387 if (JUMP_LABEL (temp) == 0)
3388 {
3389 if (loop_dump_stream)
3390 fprintf
3391 (loop_dump_stream,
3392 "Loop iterations: Jump insn has null JUMP_LABEL.\n");
3393 return 0;
3394 }
3395
3396 if (/* Previous unrolling may have generated new insns not
3397 covered by the uid_luid array. */
3398 INSN_UID (JUMP_LABEL (temp)) < max_uid_for_loop
3399 /* Check if we jump back into the loop body. */
3400 && INSN_LUID (JUMP_LABEL (temp)) > INSN_LUID (loop->top)
3401 && INSN_LUID (JUMP_LABEL (temp)) < INSN_LUID (loop->cont))
3402 {
3403 if (loop_dump_stream)
3404 fprintf
3405 (loop_dump_stream,
3406 "Loop iterations: Loop has multiple back edges.\n");
3407 return 0;
3408 }
3409 }
3410 }
3411 while ((temp = PREV_INSN (temp)) != loop->cont);
3412 }
3413
3414 /* Find the iteration variable. If the last insn is a conditional
3415 branch, and the insn before tests a register value, make that the
3416 iteration variable. */
3417
3418 comparison = get_condition_for_loop (loop, last_loop_insn);
3419 if (comparison == 0)
3420 {
3421 if (loop_dump_stream)
3422 fprintf (loop_dump_stream,
3423 "Loop iterations: No final comparison found.\n");
3424 return 0;
3425 }
3426
3427 /* ??? Get_condition may switch position of induction variable and
3428 invariant register when it canonicalizes the comparison. */
3429
3430 comparison_code = GET_CODE (comparison);
3431 iteration_var = XEXP (comparison, 0);
3432 comparison_value = XEXP (comparison, 1);
3433
3434 if (GET_CODE (iteration_var) != REG)
3435 {
3436 if (loop_dump_stream)
3437 fprintf (loop_dump_stream,
3438 "Loop iterations: Comparison not against register.\n");
3439 return 0;
3440 }
3441
3442 /* The only new registers that are created before loop iterations
3443 are givs made from biv increments or registers created by
3444 load_mems. In the latter case, it is possible that try_copy_prop
3445 will propagate a new pseudo into the old iteration register but
3446 this will be marked by having the REG_USERVAR_P bit set. */
3447
3448 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs
3449 && ! REG_USERVAR_P (iteration_var))
3450 abort ();
3451
3452 /* Determine the initial value of the iteration variable, and the amount
3453 that it is incremented each loop. Use the tables constructed by
3454 the strength reduction pass to calculate these values. */
3455
3456 /* Clear the result values, in case no answer can be found. */
3457 initial_value = 0;
3458 increment = 0;
3459
3460 /* The iteration variable can be either a giv or a biv. Check to see
3461 which it is, and compute the variable's initial value, and increment
3462 value if possible. */
3463
3464 /* If this is a new register, can't handle it since we don't have any
3465 reg_iv_type entry for it. */
3466 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs)
3467 {
3468 if (loop_dump_stream)
3469 fprintf (loop_dump_stream,
3470 "Loop iterations: No reg_iv_type entry for iteration var.\n");
3471 return 0;
3472 }
3473
3474 /* Reject iteration variables larger than the host wide int size, since they
3475 could result in a number of iterations greater than the range of our
3476 `unsigned HOST_WIDE_INT' variable loop_info->n_iterations. */
3477 else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var))
3478 > HOST_BITS_PER_WIDE_INT))
3479 {
3480 if (loop_dump_stream)
3481 fprintf (loop_dump_stream,
3482 "Loop iterations: Iteration var rejected because mode too large.\n");
3483 return 0;
3484 }
3485 else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
3486 {
3487 if (loop_dump_stream)
3488 fprintf (loop_dump_stream,
3489 "Loop iterations: Iteration var not an integer.\n");
3490 return 0;
3491 }
3492 else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT)
3493 {
3494 if (REGNO (iteration_var) >= ivs->n_regs)
3495 abort ();
3496
3497 /* Grab initial value, only useful if it is a constant. */
3498 bl = REG_IV_CLASS (ivs, REGNO (iteration_var));
3499 initial_value = bl->initial_value;
3500 if (!bl->biv->always_executed || bl->biv->maybe_multiple)
3501 {
3502 if (loop_dump_stream)
3503 fprintf (loop_dump_stream,
3504 "Loop iterations: Basic induction var not set once in each iteration.\n");
3505 return 0;
3506 }
3507
3508 increment = biv_total_increment (bl);
3509 }
3510 else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT)
3511 {
3512 HOST_WIDE_INT offset = 0;
3513 struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var));
3514 rtx biv_initial_value;
3515
3516 if (REGNO (v->src_reg) >= ivs->n_regs)
3517 abort ();
3518
3519 if (!v->always_executed || v->maybe_multiple)
3520 {
3521 if (loop_dump_stream)
3522 fprintf (loop_dump_stream,
3523 "Loop iterations: General induction var not set once in each iteration.\n");
3524 return 0;
3525 }
3526
3527 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
3528
3529 /* Increment value is mult_val times the increment value of the biv. */
3530
3531 increment = biv_total_increment (bl);
3532 if (increment)
3533 {
3534 struct induction *biv_inc;
3535
3536 increment = fold_rtx_mult_add (v->mult_val,
3537 extend_value_for_giv (v, increment),
3538 const0_rtx, v->mode);
3539 /* The caller assumes that one full increment has occurred at the
3540 first loop test. But that's not true when the biv is incremented
3541 after the giv is set (which is the usual case), e.g.:
3542 i = 6; do {;} while (i++ < 9) .
3543 Therefore, we bias the initial value by subtracting the amount of
3544 the increment that occurs between the giv set and the giv test. */
3545 for (biv_inc = bl->biv; biv_inc; biv_inc = biv_inc->next_iv)
3546 {
3547 if (loop_insn_first_p (v->insn, biv_inc->insn))
3548 {
3549 if (REG_P (biv_inc->add_val))
3550 {
3551 if (loop_dump_stream)
3552 fprintf (loop_dump_stream,
3553 "Loop iterations: Basic induction var add_val is REG %d.\n",
3554 REGNO (biv_inc->add_val));
3555 return 0;
3556 }
3557
3558 /* If we have already counted it, skip it. */
3559 if (biv_inc->same)
3560 continue;
3561
3562 offset -= INTVAL (biv_inc->add_val);
3563 }
3564 }
3565 }
3566 if (loop_dump_stream)
3567 fprintf (loop_dump_stream,
3568 "Loop iterations: Giv iterator, initial value bias %ld.\n",
3569 (long) offset);
3570
3571 /* Initial value is mult_val times the biv's initial value plus
3572 add_val. Only useful if it is a constant. */
3573 biv_initial_value = extend_value_for_giv (v, bl->initial_value);
3574 initial_value
3575 = fold_rtx_mult_add (v->mult_val,
3576 plus_constant (biv_initial_value, offset),
3577 v->add_val, v->mode);
3578 }
3579 else
3580 {
3581 if (loop_dump_stream)
3582 fprintf (loop_dump_stream,
3583 "Loop iterations: Not basic or general induction var.\n");
3584 return 0;
3585 }
3586
3587 if (initial_value == 0)
3588 return 0;
3589
3590 unsigned_p = 0;
3591 off_by_one = 0;
3592 switch (comparison_code)
3593 {
3594 case LEU:
3595 unsigned_p = 1;
3596 case LE:
3597 compare_dir = 1;
3598 off_by_one = 1;
3599 break;
3600 case GEU:
3601 unsigned_p = 1;
3602 case GE:
3603 compare_dir = -1;
3604 off_by_one = -1;
3605 break;
3606 case EQ:
3607 /* Cannot determine loop iterations with this case. */
3608 compare_dir = 0;
3609 break;
3610 case LTU:
3611 unsigned_p = 1;
3612 case LT:
3613 compare_dir = 1;
3614 break;
3615 case GTU:
3616 unsigned_p = 1;
3617 case GT:
3618 compare_dir = -1;
3619 case NE:
3620 compare_dir = 0;
3621 break;
3622 default:
3623 abort ();
3624 }
3625
3626 /* If the comparison value is an invariant register, then try to find
3627 its value from the insns before the start of the loop. */
3628
3629 final_value = comparison_value;
3630 if (GET_CODE (comparison_value) == REG
3631 && loop_invariant_p (loop, comparison_value))
3632 {
3633 final_value = loop_find_equiv_value (loop, comparison_value);
3634
3635 /* If we don't get an invariant final value, we are better
3636 off with the original register. */
3637 if (! loop_invariant_p (loop, final_value))
3638 final_value = comparison_value;
3639 }
3640
3641 /* Calculate the approximate final value of the induction variable
3642 (on the last successful iteration). The exact final value
3643 depends on the branch operator, and increment sign. It will be
3644 wrong if the iteration variable is not incremented by one each
3645 time through the loop and (comparison_value + off_by_one -
3646 initial_value) % increment != 0.
3647 ??? Note that the final_value may overflow and thus final_larger
3648 will be bogus. A potentially infinite loop will be classified
3649 as immediate, e.g. for (i = 0x7ffffff0; i <= 0x7fffffff; i++) */
3650 if (off_by_one)
3651 final_value = plus_constant (final_value, off_by_one);
3652
3653 /* Save the calculated values describing this loop's bounds, in case
3654 precondition_loop_p will need them later. These values can not be
3655 recalculated inside precondition_loop_p because strength reduction
3656 optimizations may obscure the loop's structure.
3657
3658 These values are only required by precondition_loop_p and insert_bct
3659 whenever the number of iterations cannot be computed at compile time.
3660 Only the difference between final_value and initial_value is
3661 important. Note that final_value is only approximate. */
3662 loop_info->initial_value = initial_value;
3663 loop_info->comparison_value = comparison_value;
3664 loop_info->final_value = plus_constant (comparison_value, off_by_one);
3665 loop_info->increment = increment;
3666 loop_info->iteration_var = iteration_var;
3667 loop_info->comparison_code = comparison_code;
3668 loop_info->iv = bl;
3669
3670 /* Try to determine the iteration count for loops such
3671 as (for i = init; i < init + const; i++). When running the
3672 loop optimization twice, the first pass often converts simple
3673 loops into this form. */
3674
3675 if (REG_P (initial_value))
3676 {
3677 rtx reg1;
3678 rtx reg2;
3679 rtx const2;
3680
3681 reg1 = initial_value;
3682 if (GET_CODE (final_value) == PLUS)
3683 reg2 = XEXP (final_value, 0), const2 = XEXP (final_value, 1);
3684 else
3685 reg2 = final_value, const2 = const0_rtx;
3686
3687 /* Check for initial_value = reg1, final_value = reg2 + const2,
3688 where reg1 != reg2. */
3689 if (REG_P (reg2) && reg2 != reg1)
3690 {
3691 rtx temp;
3692
3693 /* Find what reg1 is equivalent to. Hopefully it will
3694 either be reg2 or reg2 plus a constant. */
3695 temp = loop_find_equiv_value (loop, reg1);
3696
3697 if (find_common_reg_term (temp, reg2))
3698 initial_value = temp;
3699 else if (loop_invariant_p (loop, reg2))
3700 {
3701 /* Find what reg2 is equivalent to. Hopefully it will
3702 either be reg1 or reg1 plus a constant. Let's ignore
3703 the latter case for now since it is not so common. */
3704 temp = loop_find_equiv_value (loop, reg2);
3705
3706 if (temp == loop_info->iteration_var)
3707 temp = initial_value;
3708 if (temp == reg1)
3709 final_value = (const2 == const0_rtx)
3710 ? reg1 : gen_rtx_PLUS (GET_MODE (reg1), reg1, const2);
3711 }
3712 }
3713 else if (loop->vtop && GET_CODE (reg2) == CONST_INT)
3714 {
3715 rtx temp;
3716
3717 /* When running the loop optimizer twice, check_dbra_loop
3718 further obfuscates reversible loops of the form:
3719 for (i = init; i < init + const; i++). We often end up with
3720 final_value = 0, initial_value = temp, temp = temp2 - init,
3721 where temp2 = init + const. If the loop has a vtop we
3722 can replace initial_value with const. */
3723
3724 temp = loop_find_equiv_value (loop, reg1);
3725
3726 if (GET_CODE (temp) == MINUS && REG_P (XEXP (temp, 0)))
3727 {
3728 rtx temp2 = loop_find_equiv_value (loop, XEXP (temp, 0));
3729
3730 if (GET_CODE (temp2) == PLUS
3731 && XEXP (temp2, 0) == XEXP (temp, 1))
3732 initial_value = XEXP (temp2, 1);
3733 }
3734 }
3735 }
3736
3737 /* If have initial_value = reg + const1 and final_value = reg +
3738 const2, then replace initial_value with const1 and final_value
3739 with const2. This should be safe since we are protected by the
3740 initial comparison before entering the loop if we have a vtop.
3741 For example, a + b < a + c is not equivalent to b < c for all a
3742 when using modulo arithmetic.
3743
3744 ??? Without a vtop we could still perform the optimization if we check
3745 the initial and final values carefully. */
3746 if (loop->vtop
3747 && (reg_term = find_common_reg_term (initial_value, final_value)))
3748 {
3749 initial_value = subtract_reg_term (initial_value, reg_term);
3750 final_value = subtract_reg_term (final_value, reg_term);
3751 }
3752
3753 loop_info->initial_equiv_value = initial_value;
3754 loop_info->final_equiv_value = final_value;
3755
3756 /* For EQ comparison loops, we don't have a valid final value.
3757 Check this now so that we won't leave an invalid value if we
3758 return early for any other reason. */
3759 if (comparison_code == EQ)
3760 loop_info->final_equiv_value = loop_info->final_value = 0;
3761
3762 if (increment == 0)
3763 {
3764 if (loop_dump_stream)
3765 fprintf (loop_dump_stream,
3766 "Loop iterations: Increment value can't be calculated.\n");
3767 return 0;
3768 }
3769
3770 if (GET_CODE (increment) != CONST_INT)
3771 {
3772 /* If we have a REG, check to see if REG holds a constant value. */
3773 /* ??? Other RTL, such as (neg (reg)) is possible here, but it isn't
3774 clear if it is worthwhile to try to handle such RTL. */
3775 if (GET_CODE (increment) == REG || GET_CODE (increment) == SUBREG)
3776 increment = loop_find_equiv_value (loop, increment);
3777
3778 if (GET_CODE (increment) != CONST_INT)
3779 {
3780 if (loop_dump_stream)
3781 {
3782 fprintf (loop_dump_stream,
3783 "Loop iterations: Increment value not constant ");
3784 print_simple_rtl (loop_dump_stream, increment);
3785 fprintf (loop_dump_stream, ".\n");
3786 }
3787 return 0;
3788 }
3789 loop_info->increment = increment;
3790 }
3791
3792 if (GET_CODE (initial_value) != CONST_INT)
3793 {
3794 if (loop_dump_stream)
3795 {
3796 fprintf (loop_dump_stream,
3797 "Loop iterations: Initial value not constant ");
3798 print_simple_rtl (loop_dump_stream, initial_value);
3799 fprintf (loop_dump_stream, ".\n");
3800 }
3801 return 0;
3802 }
3803 else if (GET_CODE (final_value) != CONST_INT)
3804 {
3805 if (loop_dump_stream)
3806 {
3807 fprintf (loop_dump_stream,
3808 "Loop iterations: Final value not constant ");
3809 print_simple_rtl (loop_dump_stream, final_value);
3810 fprintf (loop_dump_stream, ".\n");
3811 }
3812 return 0;
3813 }
3814 else if (comparison_code == EQ)
3815 {
3816 rtx inc_once;
3817
3818 if (loop_dump_stream)
3819 fprintf (loop_dump_stream, "Loop iterations: EQ comparison loop.\n");
3820
3821 inc_once = gen_int_mode (INTVAL (initial_value) + INTVAL (increment),
3822 GET_MODE (iteration_var));
3823
3824 if (inc_once == final_value)
3825 {
3826 /* The iterator value once through the loop is equal to the
3827 comparision value. Either we have an infinite loop, or
3828 we'll loop twice. */
3829 if (increment == const0_rtx)
3830 return 0;
3831 loop_info->n_iterations = 2;
3832 }
3833 else
3834 loop_info->n_iterations = 1;
3835
3836 if (GET_CODE (loop_info->initial_value) == CONST_INT)
3837 loop_info->final_value
3838 = gen_int_mode ((INTVAL (loop_info->initial_value)
3839 + loop_info->n_iterations * INTVAL (increment)),
3840 GET_MODE (iteration_var));
3841 else
3842 loop_info->final_value
3843 = plus_constant (loop_info->initial_value,
3844 loop_info->n_iterations * INTVAL (increment));
3845 loop_info->final_equiv_value
3846 = gen_int_mode ((INTVAL (initial_value)
3847 + loop_info->n_iterations * INTVAL (increment)),
3848 GET_MODE (iteration_var));
3849 return loop_info->n_iterations;
3850 }
3851
3852 /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */
3853 if (unsigned_p)
3854 final_larger
3855 = ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3856 > (unsigned HOST_WIDE_INT) INTVAL (initial_value))
3857 - ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3858 < (unsigned HOST_WIDE_INT) INTVAL (initial_value));
3859 else
3860 final_larger = (INTVAL (final_value) > INTVAL (initial_value))
3861 - (INTVAL (final_value) < INTVAL (initial_value));
3862
3863 if (INTVAL (increment) > 0)
3864 increment_dir = 1;
3865 else if (INTVAL (increment) == 0)
3866 increment_dir = 0;
3867 else
3868 increment_dir = -1;
3869
3870 /* There are 27 different cases: compare_dir = -1, 0, 1;
3871 final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
3872 There are 4 normal cases, 4 reverse cases (where the iteration variable
3873 will overflow before the loop exits), 4 infinite loop cases, and 15
3874 immediate exit (0 or 1 iteration depending on loop type) cases.
3875 Only try to optimize the normal cases. */
3876
3877 /* (compare_dir/final_larger/increment_dir)
3878 Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
3879 Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
3880 Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
3881 Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
3882
3883 /* ?? If the meaning of reverse loops (where the iteration variable
3884 will overflow before the loop exits) is undefined, then could
3885 eliminate all of these special checks, and just always assume
3886 the loops are normal/immediate/infinite. Note that this means
3887 the sign of increment_dir does not have to be known. Also,
3888 since it does not really hurt if immediate exit loops or infinite loops
3889 are optimized, then that case could be ignored also, and hence all
3890 loops can be optimized.
3891
3892 According to ANSI Spec, the reverse loop case result is undefined,
3893 because the action on overflow is undefined.
3894
3895 See also the special test for NE loops below. */
3896
3897 if (final_larger == increment_dir && final_larger != 0
3898 && (final_larger == compare_dir || compare_dir == 0))
3899 /* Normal case. */
3900 ;
3901 else
3902 {
3903 if (loop_dump_stream)
3904 fprintf (loop_dump_stream, "Loop iterations: Not normal loop.\n");
3905 return 0;
3906 }
3907
3908 /* Calculate the number of iterations, final_value is only an approximation,
3909 so correct for that. Note that abs_diff and n_iterations are
3910 unsigned, because they can be as large as 2^n - 1. */
3911
3912 inc = INTVAL (increment);
3913 if (inc > 0)
3914 {
3915 abs_diff = INTVAL (final_value) - INTVAL (initial_value);
3916 abs_inc = inc;
3917 }
3918 else if (inc < 0)
3919 {
3920 abs_diff = INTVAL (initial_value) - INTVAL (final_value);
3921 abs_inc = -inc;
3922 }
3923 else
3924 abort ();
3925
3926 /* Given that iteration_var is going to iterate over its own mode,
3927 not HOST_WIDE_INT, disregard higher bits that might have come
3928 into the picture due to sign extension of initial and final
3929 values. */
3930 abs_diff &= ((unsigned HOST_WIDE_INT) 1
3931 << (GET_MODE_BITSIZE (GET_MODE (iteration_var)) - 1)
3932 << 1) - 1;
3933
3934 /* For NE tests, make sure that the iteration variable won't miss
3935 the final value. If abs_diff mod abs_incr is not zero, then the
3936 iteration variable will overflow before the loop exits, and we
3937 can not calculate the number of iterations. */
3938 if (compare_dir == 0 && (abs_diff % abs_inc) != 0)
3939 return 0;
3940
3941 /* Note that the number of iterations could be calculated using
3942 (abs_diff + abs_inc - 1) / abs_inc, provided care was taken to
3943 handle potential overflow of the summation. */
3944 loop_info->n_iterations = abs_diff / abs_inc + ((abs_diff % abs_inc) != 0);
3945 return loop_info->n_iterations;
3946 }
3947
3948 /* Replace uses of split bivs with their split pseudo register. This is
3949 for original instructions which remain after loop unrolling without
3950 copying. */
3951
3952 static rtx
remap_split_bivs(loop,x)3953 remap_split_bivs (loop, x)
3954 struct loop *loop;
3955 rtx x;
3956 {
3957 struct loop_ivs *ivs = LOOP_IVS (loop);
3958 enum rtx_code code;
3959 int i;
3960 const char *fmt;
3961
3962 if (x == 0)
3963 return x;
3964
3965 code = GET_CODE (x);
3966 switch (code)
3967 {
3968 case SCRATCH:
3969 case PC:
3970 case CC0:
3971 case CONST_INT:
3972 case CONST_DOUBLE:
3973 case CONST:
3974 case SYMBOL_REF:
3975 case LABEL_REF:
3976 return x;
3977
3978 case REG:
3979 #if 0
3980 /* If non-reduced/final-value givs were split, then this would also
3981 have to remap those givs also. */
3982 #endif
3983 if (REGNO (x) < ivs->n_regs
3984 && REG_IV_TYPE (ivs, REGNO (x)) == BASIC_INDUCT)
3985 return REG_IV_CLASS (ivs, REGNO (x))->biv->src_reg;
3986 break;
3987
3988 default:
3989 break;
3990 }
3991
3992 fmt = GET_RTX_FORMAT (code);
3993 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3994 {
3995 if (fmt[i] == 'e')
3996 XEXP (x, i) = remap_split_bivs (loop, XEXP (x, i));
3997 else if (fmt[i] == 'E')
3998 {
3999 int j;
4000 for (j = 0; j < XVECLEN (x, i); j++)
4001 XVECEXP (x, i, j) = remap_split_bivs (loop, XVECEXP (x, i, j));
4002 }
4003 }
4004 return x;
4005 }
4006
4007 /* If FIRST_UID is a set of REGNO, and FIRST_UID dominates LAST_UID (e.g.
4008 FIST_UID is always executed if LAST_UID is), then return 1. Otherwise
4009 return 0. COPY_START is where we can start looking for the insns
4010 FIRST_UID and LAST_UID. COPY_END is where we stop looking for these
4011 insns.
4012
4013 If there is no JUMP_INSN between LOOP_START and FIRST_UID, then FIRST_UID
4014 must dominate LAST_UID.
4015
4016 If there is a CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
4017 may not dominate LAST_UID.
4018
4019 If there is no CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
4020 must dominate LAST_UID. */
4021
4022 int
set_dominates_use(regno,first_uid,last_uid,copy_start,copy_end)4023 set_dominates_use (regno, first_uid, last_uid, copy_start, copy_end)
4024 int regno;
4025 int first_uid;
4026 int last_uid;
4027 rtx copy_start;
4028 rtx copy_end;
4029 {
4030 int passed_jump = 0;
4031 rtx p = NEXT_INSN (copy_start);
4032
4033 while (INSN_UID (p) != first_uid)
4034 {
4035 if (GET_CODE (p) == JUMP_INSN)
4036 passed_jump = 1;
4037 /* Could not find FIRST_UID. */
4038 if (p == copy_end)
4039 return 0;
4040 p = NEXT_INSN (p);
4041 }
4042
4043 /* Verify that FIRST_UID is an insn that entirely sets REGNO. */
4044 if (! INSN_P (p) || ! dead_or_set_regno_p (p, regno))
4045 return 0;
4046
4047 /* FIRST_UID is always executed. */
4048 if (passed_jump == 0)
4049 return 1;
4050
4051 while (INSN_UID (p) != last_uid)
4052 {
4053 /* If we see a CODE_LABEL between FIRST_UID and LAST_UID, then we
4054 can not be sure that FIRST_UID dominates LAST_UID. */
4055 if (GET_CODE (p) == CODE_LABEL)
4056 return 0;
4057 /* Could not find LAST_UID, but we reached the end of the loop, so
4058 it must be safe. */
4059 else if (p == copy_end)
4060 return 1;
4061 p = NEXT_INSN (p);
4062 }
4063
4064 /* FIRST_UID is always executed if LAST_UID is executed. */
4065 return 1;
4066 }
4067
4068 /* This routine is called when the number of iterations for the unrolled
4069 loop is one. The goal is to identify a loop that begins with an
4070 unconditional branch to the loop continuation note (or a label just after).
4071 In this case, the unconditional branch that starts the loop needs to be
4072 deleted so that we execute the single iteration. */
4073
4074 static rtx
ujump_to_loop_cont(loop_start,loop_cont)4075 ujump_to_loop_cont (loop_start, loop_cont)
4076 rtx loop_start;
4077 rtx loop_cont;
4078 {
4079 rtx x, label, label_ref;
4080
4081 /* See if loop start, or the next insn is an unconditional jump. */
4082 loop_start = next_nonnote_insn (loop_start);
4083
4084 x = pc_set (loop_start);
4085 if (!x)
4086 return NULL_RTX;
4087
4088 label_ref = SET_SRC (x);
4089 if (!label_ref)
4090 return NULL_RTX;
4091
4092 /* Examine insn after loop continuation note. Return if not a label. */
4093 label = next_nonnote_insn (loop_cont);
4094 if (label == 0 || GET_CODE (label) != CODE_LABEL)
4095 return NULL_RTX;
4096
4097 /* Return the loop start if the branch label matches the code label. */
4098 if (CODE_LABEL_NUMBER (label) == CODE_LABEL_NUMBER (XEXP (label_ref, 0)))
4099 return loop_start;
4100 else
4101 return NULL_RTX;
4102 }
4103