xref: /dragonfly/contrib/gcc-4.7/gcc/loop-unroll.c (revision 67640b13)
1 /* Loop unrolling and peeling.
2    Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010, 2011
3    Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "obstack.h"
28 #include "basic-block.h"
29 #include "cfgloop.h"
30 #include "cfglayout.h"
31 #include "params.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "hashtab.h"
35 #include "recog.h"
36 #include "target.h"
37 
38 /* This pass performs loop unrolling and peeling.  We only perform these
39    optimizations on innermost loops (with single exception) because
40    the impact on performance is greatest here, and we want to avoid
41    unnecessary code size growth.  The gain is caused by greater sequentiality
42    of code, better code to optimize for further passes and in some cases
43    by fewer testings of exit conditions.  The main problem is code growth,
44    that impacts performance negatively due to effect of caches.
45 
46    What we do:
47 
48    -- complete peeling of once-rolling loops; this is the above mentioned
49       exception, as this causes loop to be cancelled completely and
50       does not cause code growth
51    -- complete peeling of loops that roll (small) constant times.
52    -- simple peeling of first iterations of loops that do not roll much
53       (according to profile feedback)
54    -- unrolling of loops that roll constant times; this is almost always
55       win, as we get rid of exit condition tests.
56    -- unrolling of loops that roll number of times that we can compute
57       in runtime; we also get rid of exit condition tests here, but there
58       is the extra expense for calculating the number of iterations
59    -- simple unrolling of remaining loops; this is performed only if we
60       are asked to, as the gain is questionable in this case and often
61       it may even slow down the code
62    For more detailed descriptions of each of those, see comments at
63    appropriate function below.
64 
65    There is a lot of parameters (defined and described in params.def) that
66    control how much we unroll/peel.
67 
68    ??? A great problem is that we don't have a good way how to determine
69    how many times we should unroll the loop; the experiments I have made
70    showed that this choice may affect performance in order of several %.
71    */
72 
73 /* Information about induction variables to split.  */
74 
75 struct iv_to_split
76 {
77   rtx insn;		/* The insn in that the induction variable occurs.  */
78   rtx base_var;		/* The variable on that the values in the further
79 			   iterations are based.  */
80   rtx step;		/* Step of the induction variable.  */
81   struct iv_to_split *next; /* Next entry in walking order.  */
82   unsigned n_loc;
83   unsigned loc[3];	/* Location where the definition of the induction
84 			   variable occurs in the insn.  For example if
85 			   N_LOC is 2, the expression is located at
86 			   XEXP (XEXP (single_set, loc[0]), loc[1]).  */
87 };
88 
89 /* Information about accumulators to expand.  */
90 
91 struct var_to_expand
92 {
93   rtx insn;		           /* The insn in that the variable expansion occurs.  */
94   rtx reg;                         /* The accumulator which is expanded.  */
95   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
96   struct var_to_expand *next;	   /* Next entry in walking order.  */
97   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
98                                       or multiplication.  */
99   int expansion_count;             /* Count the number of expansions generated so far.  */
100   int reuse_expansion;             /* The expansion we intend to reuse to expand
101                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
102                                       the original accumulator.  Else use
103                                       var_expansions[REUSE_EXPANSION - 1].  */
104   unsigned accum_pos;              /* The position in which the accumulator is placed in
105                                       the insn src.  For example in x = x + something
106                                       accum_pos is 0 while in x = something + x accum_pos
107                                       is 1.  */
108 };
109 
110 /* Information about optimization applied in
111    the unrolled loop.  */
112 
113 struct opt_info
114 {
115   htab_t insns_to_split;           /* A hashtable of insns to split.  */
116   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
117   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
118   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
119                                       to expand.  */
120   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
121   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
122   unsigned first_new_block;        /* The first basic block that was
123                                       duplicated.  */
124   basic_block loop_exit;           /* The loop exit basic block.  */
125   basic_block loop_preheader;      /* The loop preheader basic block.  */
126 };
127 
128 static void decide_unrolling_and_peeling (int);
129 static void peel_loops_completely (int);
130 static void decide_peel_simple (struct loop *, int);
131 static void decide_peel_once_rolling (struct loop *, int);
132 static void decide_peel_completely (struct loop *, int);
133 static void decide_unroll_stupid (struct loop *, int);
134 static void decide_unroll_constant_iterations (struct loop *, int);
135 static void decide_unroll_runtime_iterations (struct loop *, int);
136 static void peel_loop_simple (struct loop *);
137 static void peel_loop_completely (struct loop *);
138 static void unroll_loop_stupid (struct loop *);
139 static void unroll_loop_constant_iterations (struct loop *);
140 static void unroll_loop_runtime_iterations (struct loop *);
141 static struct opt_info *analyze_insns_in_loop (struct loop *);
142 static void opt_info_start_duplication (struct opt_info *);
143 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
144 static void free_opt_info (struct opt_info *);
145 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
146 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
147 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
148 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
149 static void insert_var_expansion_initialization (struct var_to_expand *,
150 						 basic_block);
151 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
152 					     basic_block);
153 static rtx get_expansion (struct var_to_expand *);
154 
155 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
156 void
157 unroll_and_peel_loops (int flags)
158 {
159   struct loop *loop;
160   bool check;
161   loop_iterator li;
162 
163   /* First perform complete loop peeling (it is almost surely a win,
164      and affects parameters for further decision a lot).  */
165   peel_loops_completely (flags);
166 
167   /* Now decide rest of unrolling and peeling.  */
168   decide_unrolling_and_peeling (flags);
169 
170   /* Scan the loops, inner ones first.  */
171   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
172     {
173       check = true;
174       /* And perform the appropriate transformations.  */
175       switch (loop->lpt_decision.decision)
176 	{
177 	case LPT_PEEL_COMPLETELY:
178 	  /* Already done.  */
179 	  gcc_unreachable ();
180 	case LPT_PEEL_SIMPLE:
181 	  peel_loop_simple (loop);
182 	  break;
183 	case LPT_UNROLL_CONSTANT:
184 	  unroll_loop_constant_iterations (loop);
185 	  break;
186 	case LPT_UNROLL_RUNTIME:
187 	  unroll_loop_runtime_iterations (loop);
188 	  break;
189 	case LPT_UNROLL_STUPID:
190 	  unroll_loop_stupid (loop);
191 	  break;
192 	case LPT_NONE:
193 	  check = false;
194 	  break;
195 	default:
196 	  gcc_unreachable ();
197 	}
198       if (check)
199 	{
200 #ifdef ENABLE_CHECKING
201 	  verify_dominators (CDI_DOMINATORS);
202 	  verify_loop_structure ();
203 #endif
204 	}
205     }
206 
207   iv_analysis_done ();
208 }
209 
210 /* Check whether exit of the LOOP is at the end of loop body.  */
211 
212 static bool
213 loop_exit_at_end_p (struct loop *loop)
214 {
215   struct niter_desc *desc = get_simple_loop_desc (loop);
216   rtx insn;
217 
218   if (desc->in_edge->dest != loop->latch)
219     return false;
220 
221   /* Check that the latch is empty.  */
222   FOR_BB_INSNS (loop->latch, insn)
223     {
224       if (INSN_P (insn))
225 	return false;
226     }
227 
228   return true;
229 }
230 
231 /* Depending on FLAGS, check whether to peel loops completely and do so.  */
232 static void
233 peel_loops_completely (int flags)
234 {
235   struct loop *loop;
236   loop_iterator li;
237 
238   /* Scan the loops, the inner ones first.  */
239   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
240     {
241       loop->lpt_decision.decision = LPT_NONE;
242 
243       if (dump_file)
244 	fprintf (dump_file,
245 		 "\n;; *** Considering loop %d for complete peeling ***\n",
246 		 loop->num);
247 
248       loop->ninsns = num_loop_insns (loop);
249 
250       decide_peel_once_rolling (loop, flags);
251       if (loop->lpt_decision.decision == LPT_NONE)
252 	decide_peel_completely (loop, flags);
253 
254       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
255 	{
256 	  peel_loop_completely (loop);
257 #ifdef ENABLE_CHECKING
258 	  verify_dominators (CDI_DOMINATORS);
259 	  verify_loop_structure ();
260 #endif
261 	}
262     }
263 }
264 
265 /* Decide whether unroll or peel loops (depending on FLAGS) and how much.  */
266 static void
267 decide_unrolling_and_peeling (int flags)
268 {
269   struct loop *loop;
270   loop_iterator li;
271 
272   /* Scan the loops, inner ones first.  */
273   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
274     {
275       loop->lpt_decision.decision = LPT_NONE;
276 
277       if (dump_file)
278 	fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
279 
280       /* Do not peel cold areas.  */
281       if (optimize_loop_for_size_p (loop))
282 	{
283 	  if (dump_file)
284 	    fprintf (dump_file, ";; Not considering loop, cold area\n");
285 	  continue;
286 	}
287 
288       /* Can the loop be manipulated?  */
289       if (!can_duplicate_loop_p (loop))
290 	{
291 	  if (dump_file)
292 	    fprintf (dump_file,
293 		     ";; Not considering loop, cannot duplicate\n");
294 	  continue;
295 	}
296 
297       /* Skip non-innermost loops.  */
298       if (loop->inner)
299 	{
300 	  if (dump_file)
301 	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
302 	  continue;
303 	}
304 
305       loop->ninsns = num_loop_insns (loop);
306       loop->av_ninsns = average_num_loop_insns (loop);
307 
308       /* Try transformations one by one in decreasing order of
309 	 priority.  */
310 
311       decide_unroll_constant_iterations (loop, flags);
312       if (loop->lpt_decision.decision == LPT_NONE)
313 	decide_unroll_runtime_iterations (loop, flags);
314       if (loop->lpt_decision.decision == LPT_NONE)
315 	decide_unroll_stupid (loop, flags);
316       if (loop->lpt_decision.decision == LPT_NONE)
317 	decide_peel_simple (loop, flags);
318     }
319 }
320 
321 /* Decide whether the LOOP is once rolling and suitable for complete
322    peeling.  */
323 static void
324 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
325 {
326   struct niter_desc *desc;
327 
328   if (dump_file)
329     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
330 
331   /* Is the loop small enough?  */
332   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
333     {
334       if (dump_file)
335 	fprintf (dump_file, ";; Not considering loop, is too big\n");
336       return;
337     }
338 
339   /* Check for simple loops.  */
340   desc = get_simple_loop_desc (loop);
341 
342   /* Check number of iterations.  */
343   if (!desc->simple_p
344       || desc->assumptions
345       || desc->infinite
346       || !desc->const_iter
347       || desc->niter != 0)
348     {
349       if (dump_file)
350 	fprintf (dump_file,
351 		 ";; Unable to prove that the loop rolls exactly once\n");
352       return;
353     }
354 
355   /* Success.  */
356   if (dump_file)
357     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
358   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
359 }
360 
361 /* Decide whether the LOOP is suitable for complete peeling.  */
362 static void
363 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
364 {
365   unsigned npeel;
366   struct niter_desc *desc;
367 
368   if (dump_file)
369     fprintf (dump_file, "\n;; Considering peeling completely\n");
370 
371   /* Skip non-innermost loops.  */
372   if (loop->inner)
373     {
374       if (dump_file)
375 	fprintf (dump_file, ";; Not considering loop, is not innermost\n");
376       return;
377     }
378 
379   /* Do not peel cold areas.  */
380   if (optimize_loop_for_size_p (loop))
381     {
382       if (dump_file)
383 	fprintf (dump_file, ";; Not considering loop, cold area\n");
384       return;
385     }
386 
387   /* Can the loop be manipulated?  */
388   if (!can_duplicate_loop_p (loop))
389     {
390       if (dump_file)
391 	fprintf (dump_file,
392 		 ";; Not considering loop, cannot duplicate\n");
393       return;
394     }
395 
396   /* npeel = number of iterations to peel.  */
397   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
398   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
399     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
400 
401   /* Is the loop small enough?  */
402   if (!npeel)
403     {
404       if (dump_file)
405 	fprintf (dump_file, ";; Not considering loop, is too big\n");
406       return;
407     }
408 
409   /* Check for simple loops.  */
410   desc = get_simple_loop_desc (loop);
411 
412   /* Check number of iterations.  */
413   if (!desc->simple_p
414       || desc->assumptions
415       || !desc->const_iter
416       || desc->infinite)
417     {
418       if (dump_file)
419 	fprintf (dump_file,
420 		 ";; Unable to prove that the loop iterates constant times\n");
421       return;
422     }
423 
424   if (desc->niter > npeel - 1)
425     {
426       if (dump_file)
427 	{
428 	  fprintf (dump_file,
429 		   ";; Not peeling loop completely, rolls too much (");
430 	  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
431 	  fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
432 	}
433       return;
434     }
435 
436   /* Success.  */
437   if (dump_file)
438     fprintf (dump_file, ";; Decided to peel loop completely\n");
439   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
440 }
441 
442 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
443    completely.  The transformation done:
444 
445    for (i = 0; i < 4; i++)
446      body;
447 
448    ==>
449 
450    i = 0;
451    body; i++;
452    body; i++;
453    body; i++;
454    body; i++;
455    */
456 static void
457 peel_loop_completely (struct loop *loop)
458 {
459   sbitmap wont_exit;
460   unsigned HOST_WIDE_INT npeel;
461   unsigned i;
462   VEC (edge, heap) *remove_edges;
463   edge ein;
464   struct niter_desc *desc = get_simple_loop_desc (loop);
465   struct opt_info *opt_info = NULL;
466 
467   npeel = desc->niter;
468 
469   if (npeel)
470     {
471       bool ok;
472 
473       wont_exit = sbitmap_alloc (npeel + 1);
474       sbitmap_ones (wont_exit);
475       RESET_BIT (wont_exit, 0);
476       if (desc->noloop_assumptions)
477 	RESET_BIT (wont_exit, 1);
478 
479       remove_edges = NULL;
480 
481       if (flag_split_ivs_in_unroller)
482         opt_info = analyze_insns_in_loop (loop);
483 
484       opt_info_start_duplication (opt_info);
485       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
486 					  npeel,
487 					  wont_exit, desc->out_edge,
488 					  &remove_edges,
489 					  DLTHE_FLAG_UPDATE_FREQ
490 					  | DLTHE_FLAG_COMPLETTE_PEEL
491 					  | (opt_info
492 					     ? DLTHE_RECORD_COPY_NUMBER : 0));
493       gcc_assert (ok);
494 
495       free (wont_exit);
496 
497       if (opt_info)
498  	{
499  	  apply_opt_in_copies (opt_info, npeel, false, true);
500  	  free_opt_info (opt_info);
501  	}
502 
503       /* Remove the exit edges.  */
504       FOR_EACH_VEC_ELT (edge, remove_edges, i, ein)
505 	remove_path (ein);
506       VEC_free (edge, heap, remove_edges);
507     }
508 
509   ein = desc->in_edge;
510   free_simple_loop_desc (loop);
511 
512   /* Now remove the unreachable part of the last iteration and cancel
513      the loop.  */
514   remove_path (ein);
515 
516   if (dump_file)
517     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
518 }
519 
520 /* Decide whether to unroll LOOP iterating constant number of times
521    and how much.  */
522 
523 static void
524 decide_unroll_constant_iterations (struct loop *loop, int flags)
525 {
526   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
527   struct niter_desc *desc;
528 
529   if (!(flags & UAP_UNROLL))
530     {
531       /* We were not asked to, just return back silently.  */
532       return;
533     }
534 
535   if (dump_file)
536     fprintf (dump_file,
537 	     "\n;; Considering unrolling loop with constant "
538 	     "number of iterations\n");
539 
540   /* nunroll = total number of copies of the original loop body in
541      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
542   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
543   nunroll_by_av
544     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
545   if (nunroll > nunroll_by_av)
546     nunroll = nunroll_by_av;
547   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
548     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
549 
550   /* Skip big loops.  */
551   if (nunroll <= 1)
552     {
553       if (dump_file)
554 	fprintf (dump_file, ";; Not considering loop, is too big\n");
555       return;
556     }
557 
558   /* Check for simple loops.  */
559   desc = get_simple_loop_desc (loop);
560 
561   /* Check number of iterations.  */
562   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
563     {
564       if (dump_file)
565 	fprintf (dump_file,
566 		 ";; Unable to prove that the loop iterates constant times\n");
567       return;
568     }
569 
570   /* Check whether the loop rolls enough to consider.  */
571   if (desc->niter < 2 * nunroll)
572     {
573       if (dump_file)
574 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
575       return;
576     }
577 
578   /* Success; now compute number of iterations to unroll.  We alter
579      nunroll so that as few as possible copies of loop body are
580      necessary, while still not decreasing the number of unrollings
581      too much (at most by 1).  */
582   best_copies = 2 * nunroll + 10;
583 
584   i = 2 * nunroll + 2;
585   if (i - 1 >= desc->niter)
586     i = desc->niter - 2;
587 
588   for (; i >= nunroll - 1; i--)
589     {
590       unsigned exit_mod = desc->niter % (i + 1);
591 
592       if (!loop_exit_at_end_p (loop))
593 	n_copies = exit_mod + i + 1;
594       else if (exit_mod != (unsigned) i
595 	       || desc->noloop_assumptions != NULL_RTX)
596 	n_copies = exit_mod + i + 2;
597       else
598 	n_copies = i + 1;
599 
600       if (n_copies < best_copies)
601 	{
602 	  best_copies = n_copies;
603 	  best_unroll = i;
604 	}
605     }
606 
607   if (dump_file)
608     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
609 	     best_unroll + 1, best_copies, nunroll);
610 
611   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
612   loop->lpt_decision.times = best_unroll;
613 
614   if (dump_file)
615     fprintf (dump_file,
616 	     ";; Decided to unroll the constant times rolling loop, %d times.\n",
617 	     loop->lpt_decision.times);
618 }
619 
620 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
621    times.  The transformation does this:
622 
623    for (i = 0; i < 102; i++)
624      body;
625 
626    ==>
627 
628    i = 0;
629    body; i++;
630    body; i++;
631    while (i < 102)
632      {
633        body; i++;
634        body; i++;
635        body; i++;
636        body; i++;
637      }
638   */
639 static void
640 unroll_loop_constant_iterations (struct loop *loop)
641 {
642   unsigned HOST_WIDE_INT niter;
643   unsigned exit_mod;
644   sbitmap wont_exit;
645   unsigned i;
646   VEC (edge, heap) *remove_edges;
647   edge e;
648   unsigned max_unroll = loop->lpt_decision.times;
649   struct niter_desc *desc = get_simple_loop_desc (loop);
650   bool exit_at_end = loop_exit_at_end_p (loop);
651   struct opt_info *opt_info = NULL;
652   bool ok;
653 
654   niter = desc->niter;
655 
656   /* Should not get here (such loop should be peeled instead).  */
657   gcc_assert (niter > max_unroll + 1);
658 
659   exit_mod = niter % (max_unroll + 1);
660 
661   wont_exit = sbitmap_alloc (max_unroll + 1);
662   sbitmap_ones (wont_exit);
663 
664   remove_edges = NULL;
665   if (flag_split_ivs_in_unroller
666       || flag_variable_expansion_in_unroller)
667     opt_info = analyze_insns_in_loop (loop);
668 
669   if (!exit_at_end)
670     {
671       /* The exit is not at the end of the loop; leave exit test
672 	 in the first copy, so that the loops that start with test
673 	 of exit condition have continuous body after unrolling.  */
674 
675       if (dump_file)
676 	fprintf (dump_file, ";; Condition on beginning of loop.\n");
677 
678       /* Peel exit_mod iterations.  */
679       RESET_BIT (wont_exit, 0);
680       if (desc->noloop_assumptions)
681 	RESET_BIT (wont_exit, 1);
682 
683       if (exit_mod)
684 	{
685 	  opt_info_start_duplication (opt_info);
686           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
687 					      exit_mod,
688 					      wont_exit, desc->out_edge,
689 					      &remove_edges,
690 					      DLTHE_FLAG_UPDATE_FREQ
691 					      | (opt_info && exit_mod > 1
692 						 ? DLTHE_RECORD_COPY_NUMBER
693 						   : 0));
694 	  gcc_assert (ok);
695 
696           if (opt_info && exit_mod > 1)
697  	    apply_opt_in_copies (opt_info, exit_mod, false, false);
698 
699 	  desc->noloop_assumptions = NULL_RTX;
700 	  desc->niter -= exit_mod;
701 	  desc->niter_max -= exit_mod;
702 	}
703 
704       SET_BIT (wont_exit, 1);
705     }
706   else
707     {
708       /* Leave exit test in last copy, for the same reason as above if
709 	 the loop tests the condition at the end of loop body.  */
710 
711       if (dump_file)
712 	fprintf (dump_file, ";; Condition on end of loop.\n");
713 
714       /* We know that niter >= max_unroll + 2; so we do not need to care of
715 	 case when we would exit before reaching the loop.  So just peel
716 	 exit_mod + 1 iterations.  */
717       if (exit_mod != max_unroll
718 	  || desc->noloop_assumptions)
719 	{
720 	  RESET_BIT (wont_exit, 0);
721 	  if (desc->noloop_assumptions)
722 	    RESET_BIT (wont_exit, 1);
723 
724           opt_info_start_duplication (opt_info);
725 	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
726 					      exit_mod + 1,
727 					      wont_exit, desc->out_edge,
728 					      &remove_edges,
729 					      DLTHE_FLAG_UPDATE_FREQ
730 					      | (opt_info && exit_mod > 0
731 						 ? DLTHE_RECORD_COPY_NUMBER
732 						   : 0));
733 	  gcc_assert (ok);
734 
735           if (opt_info && exit_mod > 0)
736   	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
737 
738 	  desc->niter -= exit_mod + 1;
739 	  desc->niter_max -= exit_mod + 1;
740 	  desc->noloop_assumptions = NULL_RTX;
741 
742 	  SET_BIT (wont_exit, 0);
743 	  SET_BIT (wont_exit, 1);
744 	}
745 
746       RESET_BIT (wont_exit, max_unroll);
747     }
748 
749   /* Now unroll the loop.  */
750 
751   opt_info_start_duplication (opt_info);
752   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
753 				      max_unroll,
754 				      wont_exit, desc->out_edge,
755 				      &remove_edges,
756 				      DLTHE_FLAG_UPDATE_FREQ
757 				      | (opt_info
758 					 ? DLTHE_RECORD_COPY_NUMBER
759 					   : 0));
760   gcc_assert (ok);
761 
762   if (opt_info)
763     {
764       apply_opt_in_copies (opt_info, max_unroll, true, true);
765       free_opt_info (opt_info);
766     }
767 
768   free (wont_exit);
769 
770   if (exit_at_end)
771     {
772       basic_block exit_block = get_bb_copy (desc->in_edge->src);
773       /* Find a new in and out edge; they are in the last copy we have made.  */
774 
775       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
776 	{
777 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
778 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
779 	}
780       else
781 	{
782 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
783 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
784 	}
785     }
786 
787   desc->niter /= max_unroll + 1;
788   desc->niter_max /= max_unroll + 1;
789   desc->niter_expr = GEN_INT (desc->niter);
790 
791   /* Remove the edges.  */
792   FOR_EACH_VEC_ELT (edge, remove_edges, i, e)
793     remove_path (e);
794   VEC_free (edge, heap, remove_edges);
795 
796   if (dump_file)
797     fprintf (dump_file,
798 	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
799 	     max_unroll, num_loop_insns (loop));
800 }
801 
802 /* Decide whether to unroll LOOP iterating runtime computable number of times
803    and how much.  */
804 static void
805 decide_unroll_runtime_iterations (struct loop *loop, int flags)
806 {
807   unsigned nunroll, nunroll_by_av, i;
808   struct niter_desc *desc;
809 
810   if (!(flags & UAP_UNROLL))
811     {
812       /* We were not asked to, just return back silently.  */
813       return;
814     }
815 
816   if (dump_file)
817     fprintf (dump_file,
818 	     "\n;; Considering unrolling loop with runtime "
819 	     "computable number of iterations\n");
820 
821   /* nunroll = total number of copies of the original loop body in
822      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
823   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
824   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
825   if (nunroll > nunroll_by_av)
826     nunroll = nunroll_by_av;
827   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
828     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
829 
830   if (targetm.loop_unroll_adjust)
831     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
832 
833   /* Skip big loops.  */
834   if (nunroll <= 1)
835     {
836       if (dump_file)
837 	fprintf (dump_file, ";; Not considering loop, is too big\n");
838       return;
839     }
840 
841   /* Check for simple loops.  */
842   desc = get_simple_loop_desc (loop);
843 
844   /* Check simpleness.  */
845   if (!desc->simple_p || desc->assumptions)
846     {
847       if (dump_file)
848 	fprintf (dump_file,
849 		 ";; Unable to prove that the number of iterations "
850 		 "can be counted in runtime\n");
851       return;
852     }
853 
854   if (desc->const_iter)
855     {
856       if (dump_file)
857 	fprintf (dump_file, ";; Loop iterates constant times\n");
858       return;
859     }
860 
861   /* If we have profile feedback, check whether the loop rolls.  */
862   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
863     {
864       if (dump_file)
865 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
866       return;
867     }
868 
869   /* Success; now force nunroll to be power of 2, as we are unable to
870      cope with overflows in computation of number of iterations.  */
871   for (i = 1; 2 * i <= nunroll; i *= 2)
872     continue;
873 
874   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
875   loop->lpt_decision.times = i - 1;
876 
877   if (dump_file)
878     fprintf (dump_file,
879 	     ";; Decided to unroll the runtime computable "
880 	     "times rolling loop, %d times.\n",
881 	     loop->lpt_decision.times);
882 }
883 
884 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
885    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
886    and NULL is returned instead.  */
887 
888 basic_block
889 split_edge_and_insert (edge e, rtx insns)
890 {
891   basic_block bb;
892 
893   if (!insns)
894     return NULL;
895   bb = split_edge (e);
896   emit_insn_after (insns, BB_END (bb));
897 
898   /* ??? We used to assume that INSNS can contain control flow insns, and
899      that we had to try to find sub basic blocks in BB to maintain a valid
900      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
901      and call break_superblocks when going out of cfglayout mode.  But it
902      turns out that this never happens; and that if it does ever happen,
903      the TODO_verify_flow at the end of the RTL loop passes would fail.
904 
905      There are two reasons why we expected we could have control flow insns
906      in INSNS.  The first is when a comparison has to be done in parts, and
907      the second is when the number of iterations is computed for loops with
908      the number of iterations known at runtime.  In both cases, test cases
909      to get control flow in INSNS appear to be impossible to construct:
910 
911       * If do_compare_rtx_and_jump needs several branches to do comparison
912 	in a mode that needs comparison by parts, we cannot analyze the
913 	number of iterations of the loop, and we never get to unrolling it.
914 
915       * The code in expand_divmod that was suspected to cause creation of
916 	branching code seems to be only accessed for signed division.  The
917 	divisions used by # of iterations analysis are always unsigned.
918 	Problems might arise on architectures that emits branching code
919 	for some operations that may appear in the unroller (especially
920 	for division), but we have no such architectures.
921 
922      Considering all this, it was decided that we should for now assume
923      that INSNS can in theory contain control flow insns, but in practice
924      it never does.  So we don't handle the theoretical case, and should
925      a real failure ever show up, we have a pretty good clue for how to
926      fix it.  */
927 
928   return bb;
929 }
930 
931 /* Unroll LOOP for that we are able to count number of iterations in runtime
932    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
933    extra care for case n < 0):
934 
935    for (i = 0; i < n; i++)
936      body;
937 
938    ==>
939 
940    i = 0;
941    mod = n % 4;
942 
943    switch (mod)
944      {
945        case 3:
946          body; i++;
947        case 2:
948          body; i++;
949        case 1:
950          body; i++;
951        case 0: ;
952      }
953 
954    while (i < n)
955      {
956        body; i++;
957        body; i++;
958        body; i++;
959        body; i++;
960      }
961    */
962 static void
963 unroll_loop_runtime_iterations (struct loop *loop)
964 {
965   rtx old_niter, niter, init_code, branch_code, tmp;
966   unsigned i, j, p;
967   basic_block preheader, *body, swtch, ezc_swtch;
968   VEC (basic_block, heap) *dom_bbs;
969   sbitmap wont_exit;
970   int may_exit_copy;
971   unsigned n_peel;
972   VEC (edge, heap) *remove_edges;
973   edge e;
974   bool extra_zero_check, last_may_exit;
975   unsigned max_unroll = loop->lpt_decision.times;
976   struct niter_desc *desc = get_simple_loop_desc (loop);
977   bool exit_at_end = loop_exit_at_end_p (loop);
978   struct opt_info *opt_info = NULL;
979   bool ok;
980 
981   if (flag_split_ivs_in_unroller
982       || flag_variable_expansion_in_unroller)
983     opt_info = analyze_insns_in_loop (loop);
984 
985   /* Remember blocks whose dominators will have to be updated.  */
986   dom_bbs = NULL;
987 
988   body = get_loop_body (loop);
989   for (i = 0; i < loop->num_nodes; i++)
990     {
991       VEC (basic_block, heap) *ldom;
992       basic_block bb;
993 
994       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
995       FOR_EACH_VEC_ELT (basic_block, ldom, j, bb)
996 	if (!flow_bb_inside_loop_p (loop, bb))
997 	  VEC_safe_push (basic_block, heap, dom_bbs, bb);
998 
999       VEC_free (basic_block, heap, ldom);
1000     }
1001   free (body);
1002 
1003   if (!exit_at_end)
1004     {
1005       /* Leave exit in first copy (for explanation why see comment in
1006 	 unroll_loop_constant_iterations).  */
1007       may_exit_copy = 0;
1008       n_peel = max_unroll - 1;
1009       extra_zero_check = true;
1010       last_may_exit = false;
1011     }
1012   else
1013     {
1014       /* Leave exit in last copy (for explanation why see comment in
1015 	 unroll_loop_constant_iterations).  */
1016       may_exit_copy = max_unroll;
1017       n_peel = max_unroll;
1018       extra_zero_check = false;
1019       last_may_exit = true;
1020     }
1021 
1022   /* Get expression for number of iterations.  */
1023   start_sequence ();
1024   old_niter = niter = gen_reg_rtx (desc->mode);
1025   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1026   if (tmp != niter)
1027     emit_move_insn (niter, tmp);
1028 
1029   /* Count modulo by ANDing it with max_unroll; we use the fact that
1030      the number of unrollings is a power of two, and thus this is correct
1031      even if there is overflow in the computation.  */
1032   niter = expand_simple_binop (desc->mode, AND,
1033 			       niter,
1034 			       GEN_INT (max_unroll),
1035 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
1036 
1037   init_code = get_insns ();
1038   end_sequence ();
1039   unshare_all_rtl_in_chain (init_code);
1040 
1041   /* Precondition the loop.  */
1042   split_edge_and_insert (loop_preheader_edge (loop), init_code);
1043 
1044   remove_edges = NULL;
1045 
1046   wont_exit = sbitmap_alloc (max_unroll + 2);
1047 
1048   /* Peel the first copy of loop body (almost always we must leave exit test
1049      here; the only exception is when we have extra zero check and the number
1050      of iterations is reliable.  Also record the place of (possible) extra
1051      zero check.  */
1052   sbitmap_zero (wont_exit);
1053   if (extra_zero_check
1054       && !desc->noloop_assumptions)
1055     SET_BIT (wont_exit, 1);
1056   ezc_swtch = loop_preheader_edge (loop)->src;
1057   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1058 				      1, wont_exit, desc->out_edge,
1059 				      &remove_edges,
1060 				      DLTHE_FLAG_UPDATE_FREQ);
1061   gcc_assert (ok);
1062 
1063   /* Record the place where switch will be built for preconditioning.  */
1064   swtch = split_edge (loop_preheader_edge (loop));
1065 
1066   for (i = 0; i < n_peel; i++)
1067     {
1068       /* Peel the copy.  */
1069       sbitmap_zero (wont_exit);
1070       if (i != n_peel - 1 || !last_may_exit)
1071 	SET_BIT (wont_exit, 1);
1072       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1073 					  1, wont_exit, desc->out_edge,
1074 					  &remove_edges,
1075 					  DLTHE_FLAG_UPDATE_FREQ);
1076       gcc_assert (ok);
1077 
1078       /* Create item for switch.  */
1079       j = n_peel - i - (extra_zero_check ? 0 : 1);
1080       p = REG_BR_PROB_BASE / (i + 2);
1081 
1082       preheader = split_edge (loop_preheader_edge (loop));
1083       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1084 					  block_label (preheader), p,
1085 					  NULL_RTX);
1086 
1087       /* We rely on the fact that the compare and jump cannot be optimized out,
1088 	 and hence the cfg we create is correct.  */
1089       gcc_assert (branch_code != NULL_RTX);
1090 
1091       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1092       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1093       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1094       e = make_edge (swtch, preheader,
1095 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1096       e->probability = p;
1097     }
1098 
1099   if (extra_zero_check)
1100     {
1101       /* Add branch for zero iterations.  */
1102       p = REG_BR_PROB_BASE / (max_unroll + 1);
1103       swtch = ezc_swtch;
1104       preheader = split_edge (loop_preheader_edge (loop));
1105       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1106 					  block_label (preheader), p,
1107 					  NULL_RTX);
1108       gcc_assert (branch_code != NULL_RTX);
1109 
1110       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1111       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1112       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1113       e = make_edge (swtch, preheader,
1114 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1115       e->probability = p;
1116     }
1117 
1118   /* Recount dominators for outer blocks.  */
1119   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1120 
1121   /* And unroll loop.  */
1122 
1123   sbitmap_ones (wont_exit);
1124   RESET_BIT (wont_exit, may_exit_copy);
1125   opt_info_start_duplication (opt_info);
1126 
1127   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1128 				      max_unroll,
1129 				      wont_exit, desc->out_edge,
1130 				      &remove_edges,
1131 				      DLTHE_FLAG_UPDATE_FREQ
1132 				      | (opt_info
1133 					 ? DLTHE_RECORD_COPY_NUMBER
1134 					   : 0));
1135   gcc_assert (ok);
1136 
1137   if (opt_info)
1138     {
1139       apply_opt_in_copies (opt_info, max_unroll, true, true);
1140       free_opt_info (opt_info);
1141     }
1142 
1143   free (wont_exit);
1144 
1145   if (exit_at_end)
1146     {
1147       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1148       /* Find a new in and out edge; they are in the last copy we have
1149 	 made.  */
1150 
1151       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1152 	{
1153 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
1154 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
1155 	}
1156       else
1157 	{
1158 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
1159 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
1160 	}
1161     }
1162 
1163   /* Remove the edges.  */
1164   FOR_EACH_VEC_ELT (edge, remove_edges, i, e)
1165     remove_path (e);
1166   VEC_free (edge, heap, remove_edges);
1167 
1168   /* We must be careful when updating the number of iterations due to
1169      preconditioning and the fact that the value must be valid at entry
1170      of the loop.  After passing through the above code, we see that
1171      the correct new number of iterations is this:  */
1172   gcc_assert (!desc->const_iter);
1173   desc->niter_expr =
1174     simplify_gen_binary (UDIV, desc->mode, old_niter,
1175 			 GEN_INT (max_unroll + 1));
1176   desc->niter_max /= max_unroll + 1;
1177   if (exit_at_end)
1178     {
1179       desc->niter_expr =
1180 	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1181       desc->noloop_assumptions = NULL_RTX;
1182       desc->niter_max--;
1183     }
1184 
1185   if (dump_file)
1186     fprintf (dump_file,
1187 	     ";; Unrolled loop %d times, counting # of iterations "
1188 	     "in runtime, %i insns\n",
1189 	     max_unroll, num_loop_insns (loop));
1190 
1191   VEC_free (basic_block, heap, dom_bbs);
1192 }
1193 
1194 /* Decide whether to simply peel LOOP and how much.  */
1195 static void
1196 decide_peel_simple (struct loop *loop, int flags)
1197 {
1198   unsigned npeel;
1199   struct niter_desc *desc;
1200 
1201   if (!(flags & UAP_PEEL))
1202     {
1203       /* We were not asked to, just return back silently.  */
1204       return;
1205     }
1206 
1207   if (dump_file)
1208     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1209 
1210   /* npeel = number of iterations to peel.  */
1211   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1212   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1213     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1214 
1215   /* Skip big loops.  */
1216   if (!npeel)
1217     {
1218       if (dump_file)
1219 	fprintf (dump_file, ";; Not considering loop, is too big\n");
1220       return;
1221     }
1222 
1223   /* Check for simple loops.  */
1224   desc = get_simple_loop_desc (loop);
1225 
1226   /* Check number of iterations.  */
1227   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1228     {
1229       if (dump_file)
1230 	fprintf (dump_file, ";; Loop iterates constant times\n");
1231       return;
1232     }
1233 
1234   /* Do not simply peel loops with branches inside -- it increases number
1235      of mispredicts.  */
1236   if (num_loop_branches (loop) > 1)
1237     {
1238       if (dump_file)
1239 	fprintf (dump_file, ";; Not peeling, contains branches\n");
1240       return;
1241     }
1242 
1243   if (loop->header->count)
1244     {
1245       unsigned niter = expected_loop_iterations (loop);
1246       if (niter + 1 > npeel)
1247 	{
1248 	  if (dump_file)
1249 	    {
1250 	      fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1251 	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1252 		       (HOST_WIDEST_INT) (niter + 1));
1253 	      fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1254 		       npeel);
1255 	    }
1256 	  return;
1257 	}
1258       npeel = niter + 1;
1259     }
1260   else
1261     {
1262       /* For now we have no good heuristics to decide whether loop peeling
1263          will be effective, so disable it.  */
1264       if (dump_file)
1265 	fprintf (dump_file,
1266 		 ";; Not peeling loop, no evidence it will be profitable\n");
1267       return;
1268     }
1269 
1270   /* Success.  */
1271   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1272   loop->lpt_decision.times = npeel;
1273 
1274   if (dump_file)
1275     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1276 	     loop->lpt_decision.times);
1277 }
1278 
1279 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1280    while (cond)
1281      body;
1282 
1283    ==>
1284 
1285    if (!cond) goto end;
1286    body;
1287    if (!cond) goto end;
1288    body;
1289    while (cond)
1290      body;
1291    end: ;
1292    */
1293 static void
1294 peel_loop_simple (struct loop *loop)
1295 {
1296   sbitmap wont_exit;
1297   unsigned npeel = loop->lpt_decision.times;
1298   struct niter_desc *desc = get_simple_loop_desc (loop);
1299   struct opt_info *opt_info = NULL;
1300   bool ok;
1301 
1302   if (flag_split_ivs_in_unroller && npeel > 1)
1303     opt_info = analyze_insns_in_loop (loop);
1304 
1305   wont_exit = sbitmap_alloc (npeel + 1);
1306   sbitmap_zero (wont_exit);
1307 
1308   opt_info_start_duplication (opt_info);
1309 
1310   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1311 				      npeel, wont_exit, NULL,
1312 				      NULL, DLTHE_FLAG_UPDATE_FREQ
1313 				      | (opt_info
1314 					 ? DLTHE_RECORD_COPY_NUMBER
1315 					   : 0));
1316   gcc_assert (ok);
1317 
1318   free (wont_exit);
1319 
1320   if (opt_info)
1321     {
1322       apply_opt_in_copies (opt_info, npeel, false, false);
1323       free_opt_info (opt_info);
1324     }
1325 
1326   if (desc->simple_p)
1327     {
1328       if (desc->const_iter)
1329 	{
1330 	  desc->niter -= npeel;
1331 	  desc->niter_expr = GEN_INT (desc->niter);
1332 	  desc->noloop_assumptions = NULL_RTX;
1333 	}
1334       else
1335 	{
1336 	  /* We cannot just update niter_expr, as its value might be clobbered
1337 	     inside loop.  We could handle this by counting the number into
1338 	     temporary just like we do in runtime unrolling, but it does not
1339 	     seem worthwhile.  */
1340 	  free_simple_loop_desc (loop);
1341 	}
1342     }
1343   if (dump_file)
1344     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1345 }
1346 
1347 /* Decide whether to unroll LOOP stupidly and how much.  */
1348 static void
1349 decide_unroll_stupid (struct loop *loop, int flags)
1350 {
1351   unsigned nunroll, nunroll_by_av, i;
1352   struct niter_desc *desc;
1353 
1354   if (!(flags & UAP_UNROLL_ALL))
1355     {
1356       /* We were not asked to, just return back silently.  */
1357       return;
1358     }
1359 
1360   if (dump_file)
1361     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1362 
1363   /* nunroll = total number of copies of the original loop body in
1364      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1365   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1366   nunroll_by_av
1367     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1368   if (nunroll > nunroll_by_av)
1369     nunroll = nunroll_by_av;
1370   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1371     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1372 
1373   if (targetm.loop_unroll_adjust)
1374     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1375 
1376   /* Skip big loops.  */
1377   if (nunroll <= 1)
1378     {
1379       if (dump_file)
1380 	fprintf (dump_file, ";; Not considering loop, is too big\n");
1381       return;
1382     }
1383 
1384   /* Check for simple loops.  */
1385   desc = get_simple_loop_desc (loop);
1386 
1387   /* Check simpleness.  */
1388   if (desc->simple_p && !desc->assumptions)
1389     {
1390       if (dump_file)
1391 	fprintf (dump_file, ";; The loop is simple\n");
1392       return;
1393     }
1394 
1395   /* Do not unroll loops with branches inside -- it increases number
1396      of mispredicts.  */
1397   if (num_loop_branches (loop) > 1)
1398     {
1399       if (dump_file)
1400 	fprintf (dump_file, ";; Not unrolling, contains branches\n");
1401       return;
1402     }
1403 
1404   /* If we have profile feedback, check whether the loop rolls.  */
1405   if (loop->header->count
1406       && expected_loop_iterations (loop) < 2 * nunroll)
1407     {
1408       if (dump_file)
1409 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1410       return;
1411     }
1412 
1413   /* Success.  Now force nunroll to be power of 2, as it seems that this
1414      improves results (partially because of better alignments, partially
1415      because of some dark magic).  */
1416   for (i = 1; 2 * i <= nunroll; i *= 2)
1417     continue;
1418 
1419   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1420   loop->lpt_decision.times = i - 1;
1421 
1422   if (dump_file)
1423     fprintf (dump_file,
1424 	     ";; Decided to unroll the loop stupidly, %d times.\n",
1425 	     loop->lpt_decision.times);
1426 }
1427 
1428 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1429    while (cond)
1430      body;
1431 
1432    ==>
1433 
1434    while (cond)
1435      {
1436        body;
1437        if (!cond) break;
1438        body;
1439        if (!cond) break;
1440        body;
1441        if (!cond) break;
1442        body;
1443      }
1444    */
1445 static void
1446 unroll_loop_stupid (struct loop *loop)
1447 {
1448   sbitmap wont_exit;
1449   unsigned nunroll = loop->lpt_decision.times;
1450   struct niter_desc *desc = get_simple_loop_desc (loop);
1451   struct opt_info *opt_info = NULL;
1452   bool ok;
1453 
1454   if (flag_split_ivs_in_unroller
1455       || flag_variable_expansion_in_unroller)
1456     opt_info = analyze_insns_in_loop (loop);
1457 
1458 
1459   wont_exit = sbitmap_alloc (nunroll + 1);
1460   sbitmap_zero (wont_exit);
1461   opt_info_start_duplication (opt_info);
1462 
1463   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1464 				      nunroll, wont_exit,
1465 				      NULL, NULL,
1466 				      DLTHE_FLAG_UPDATE_FREQ
1467 				      | (opt_info
1468 					 ? DLTHE_RECORD_COPY_NUMBER
1469 					   : 0));
1470   gcc_assert (ok);
1471 
1472   if (opt_info)
1473     {
1474       apply_opt_in_copies (opt_info, nunroll, true, true);
1475       free_opt_info (opt_info);
1476     }
1477 
1478   free (wont_exit);
1479 
1480   if (desc->simple_p)
1481     {
1482       /* We indeed may get here provided that there are nontrivial assumptions
1483 	 for a loop to be really simple.  We could update the counts, but the
1484 	 problem is that we are unable to decide which exit will be taken
1485 	 (not really true in case the number of iterations is constant,
1486 	 but noone will do anything with this information, so we do not
1487 	 worry about it).  */
1488       desc->simple_p = false;
1489     }
1490 
1491   if (dump_file)
1492     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1493 	     nunroll, num_loop_insns (loop));
1494 }
1495 
1496 /* A hash function for information about insns to split.  */
1497 
1498 static hashval_t
1499 si_info_hash (const void *ivts)
1500 {
1501   return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn);
1502 }
1503 
1504 /* An equality functions for information about insns to split.  */
1505 
1506 static int
1507 si_info_eq (const void *ivts1, const void *ivts2)
1508 {
1509   const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1;
1510   const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2;
1511 
1512   return i1->insn == i2->insn;
1513 }
1514 
1515 /* Return a hash for VES, which is really a "var_to_expand *".  */
1516 
1517 static hashval_t
1518 ve_info_hash (const void *ves)
1519 {
1520   return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
1521 }
1522 
1523 /* Return true if IVTS1 and IVTS2 (which are really both of type
1524    "var_to_expand *") refer to the same instruction.  */
1525 
1526 static int
1527 ve_info_eq (const void *ivts1, const void *ivts2)
1528 {
1529   const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1;
1530   const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2;
1531 
1532   return i1->insn == i2->insn;
1533 }
1534 
1535 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1536    Set *DEBUG_USES to the number of debug insns that reference the
1537    variable.  */
1538 
1539 bool
1540 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1541 				  int *debug_uses)
1542 {
1543   basic_block *body, bb;
1544   unsigned i;
1545   int count_ref = 0;
1546   rtx insn;
1547 
1548   body = get_loop_body (loop);
1549   for (i = 0; i < loop->num_nodes; i++)
1550     {
1551       bb = body[i];
1552 
1553       FOR_BB_INSNS (bb, insn)
1554 	if (!rtx_referenced_p (reg, insn))
1555 	  continue;
1556 	else if (DEBUG_INSN_P (insn))
1557 	  ++*debug_uses;
1558 	else if (++count_ref > 1)
1559 	  break;
1560     }
1561   free (body);
1562   return (count_ref  == 1);
1563 }
1564 
1565 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1566 
1567 static void
1568 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1569 {
1570   basic_block *body, bb;
1571   unsigned i;
1572   rtx insn;
1573 
1574   body = get_loop_body (loop);
1575   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1576     {
1577       bb = body[i];
1578 
1579       FOR_BB_INSNS (bb, insn)
1580 	if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1581 	  continue;
1582 	else
1583 	  {
1584 	    validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1585 			     gen_rtx_UNKNOWN_VAR_LOC (), 0);
1586 	    if (!--debug_uses)
1587 	      break;
1588 	  }
1589     }
1590   free (body);
1591 }
1592 
1593 /* Determine whether INSN contains an accumulator
1594    which can be expanded into separate copies,
1595    one for each copy of the LOOP body.
1596 
1597    for (i = 0 ; i < n; i++)
1598      sum += a[i];
1599 
1600    ==>
1601 
1602    sum += a[i]
1603    ....
1604    i = i+1;
1605    sum1 += a[i]
1606    ....
1607    i = i+1
1608    sum2 += a[i];
1609    ....
1610 
1611    Return NULL if INSN contains no opportunity for expansion of accumulator.
1612    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1613    information and return a pointer to it.
1614 */
1615 
1616 static struct var_to_expand *
1617 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1618 {
1619   rtx set, dest, src;
1620   struct var_to_expand *ves;
1621   unsigned accum_pos;
1622   enum rtx_code code;
1623   int debug_uses = 0;
1624 
1625   set = single_set (insn);
1626   if (!set)
1627     return NULL;
1628 
1629   dest = SET_DEST (set);
1630   src = SET_SRC (set);
1631   code = GET_CODE (src);
1632 
1633   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1634     return NULL;
1635 
1636   if (FLOAT_MODE_P (GET_MODE (dest)))
1637     {
1638       if (!flag_associative_math)
1639         return NULL;
1640       /* In the case of FMA, we're also changing the rounding.  */
1641       if (code == FMA && !flag_unsafe_math_optimizations)
1642 	return NULL;
1643     }
1644 
1645   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1646      in MD.  But if there is no optab to generate the insn, we can not
1647      perform the variable expansion.  This can happen if an MD provides
1648      an insn but not a named pattern to generate it, for example to avoid
1649      producing code that needs additional mode switches like for x87/mmx.
1650 
1651      So we check have_insn_for which looks for an optab for the operation
1652      in SRC.  If it doesn't exist, we can't perform the expansion even
1653      though INSN is valid.  */
1654   if (!have_insn_for (code, GET_MODE (src)))
1655     return NULL;
1656 
1657   if (!REG_P (dest)
1658       && !(GET_CODE (dest) == SUBREG
1659            && REG_P (SUBREG_REG (dest))))
1660     return NULL;
1661 
1662   /* Find the accumulator use within the operation.  */
1663   if (code == FMA)
1664     {
1665       /* We only support accumulation via FMA in the ADD position.  */
1666       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1667 	return NULL;
1668       accum_pos = 2;
1669     }
1670   else if (rtx_equal_p (dest, XEXP (src, 0)))
1671     accum_pos = 0;
1672   else if (rtx_equal_p (dest, XEXP (src, 1)))
1673     {
1674       /* The method of expansion that we are using; which includes the
1675 	 initialization of the expansions with zero and the summation of
1676          the expansions at the end of the computation will yield wrong
1677 	 results for (x = something - x) thus avoid using it in that case.  */
1678       if (code == MINUS)
1679 	return NULL;
1680       accum_pos = 1;
1681     }
1682   else
1683     return NULL;
1684 
1685   /* It must not otherwise be used.  */
1686   if (code == FMA)
1687     {
1688       if (rtx_referenced_p (dest, XEXP (src, 0))
1689 	  || rtx_referenced_p (dest, XEXP (src, 1)))
1690 	return NULL;
1691     }
1692   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1693     return NULL;
1694 
1695   /* It must be used in exactly one insn.  */
1696   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1697     return NULL;
1698 
1699   if (dump_file)
1700     {
1701       fprintf (dump_file, "\n;; Expanding Accumulator ");
1702       print_rtl (dump_file, dest);
1703       fprintf (dump_file, "\n");
1704     }
1705 
1706   if (debug_uses)
1707     /* Instead of resetting the debug insns, we could replace each
1708        debug use in the loop with the sum or product of all expanded
1709        accummulators.  Since we'll only know of all expansions at the
1710        end, we'd have to keep track of which vars_to_expand a debug
1711        insn in the loop references, take note of each copy of the
1712        debug insn during unrolling, and when it's all done, compute
1713        the sum or product of each variable and adjust the original
1714        debug insn and each copy thereof.  What a pain!  */
1715     reset_debug_uses_in_loop (loop, dest, debug_uses);
1716 
1717   /* Record the accumulator to expand.  */
1718   ves = XNEW (struct var_to_expand);
1719   ves->insn = insn;
1720   ves->reg = copy_rtx (dest);
1721   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1722   ves->next = NULL;
1723   ves->op = GET_CODE (src);
1724   ves->expansion_count = 0;
1725   ves->reuse_expansion = 0;
1726   ves->accum_pos = accum_pos;
1727   return ves;
1728 }
1729 
1730 /* Determine whether there is an induction variable in INSN that
1731    we would like to split during unrolling.
1732 
1733    I.e. replace
1734 
1735    i = i + 1;
1736    ...
1737    i = i + 1;
1738    ...
1739    i = i + 1;
1740    ...
1741 
1742    type chains by
1743 
1744    i0 = i + 1
1745    ...
1746    i = i0 + 1
1747    ...
1748    i = i0 + 2
1749    ...
1750 
1751    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1752    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1753    pointer to it.  */
1754 
1755 static struct iv_to_split *
1756 analyze_iv_to_split_insn (rtx insn)
1757 {
1758   rtx set, dest;
1759   struct rtx_iv iv;
1760   struct iv_to_split *ivts;
1761   bool ok;
1762 
1763   /* For now we just split the basic induction variables.  Later this may be
1764      extended for example by selecting also addresses of memory references.  */
1765   set = single_set (insn);
1766   if (!set)
1767     return NULL;
1768 
1769   dest = SET_DEST (set);
1770   if (!REG_P (dest))
1771     return NULL;
1772 
1773   if (!biv_p (insn, dest))
1774     return NULL;
1775 
1776   ok = iv_analyze_result (insn, dest, &iv);
1777 
1778   /* This used to be an assert under the assumption that if biv_p returns
1779      true that iv_analyze_result must also return true.  However, that
1780      assumption is not strictly correct as evidenced by pr25569.
1781 
1782      Returning NULL when iv_analyze_result returns false is safe and
1783      avoids the problems in pr25569 until the iv_analyze_* routines
1784      can be fixed, which is apparently hard and time consuming
1785      according to their author.  */
1786   if (! ok)
1787     return NULL;
1788 
1789   if (iv.step == const0_rtx
1790       || iv.mode != iv.extend_mode)
1791     return NULL;
1792 
1793   /* Record the insn to split.  */
1794   ivts = XNEW (struct iv_to_split);
1795   ivts->insn = insn;
1796   ivts->base_var = NULL_RTX;
1797   ivts->step = iv.step;
1798   ivts->next = NULL;
1799   ivts->n_loc = 1;
1800   ivts->loc[0] = 1;
1801 
1802   return ivts;
1803 }
1804 
1805 /* Determines which of insns in LOOP can be optimized.
1806    Return a OPT_INFO struct with the relevant hash tables filled
1807    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1808    is undefined for the return value.  */
1809 
1810 static struct opt_info *
1811 analyze_insns_in_loop (struct loop *loop)
1812 {
1813   basic_block *body, bb;
1814   unsigned i;
1815   struct opt_info *opt_info = XCNEW (struct opt_info);
1816   rtx insn;
1817   struct iv_to_split *ivts = NULL;
1818   struct var_to_expand *ves = NULL;
1819   PTR *slot1;
1820   PTR *slot2;
1821   VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1822   edge exit;
1823   bool can_apply = false;
1824 
1825   iv_analysis_loop_init (loop);
1826 
1827   body = get_loop_body (loop);
1828 
1829   if (flag_split_ivs_in_unroller)
1830     {
1831       opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1832 					      si_info_hash, si_info_eq, free);
1833       opt_info->iv_to_split_head = NULL;
1834       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1835     }
1836 
1837   /* Record the loop exit bb and loop preheader before the unrolling.  */
1838   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1839 
1840   if (VEC_length (edge, edges) == 1)
1841     {
1842       exit = VEC_index (edge, edges, 0);
1843       if (!(exit->flags & EDGE_COMPLEX))
1844 	{
1845 	  opt_info->loop_exit = split_edge (exit);
1846 	  can_apply = true;
1847 	}
1848     }
1849 
1850   if (flag_variable_expansion_in_unroller
1851       && can_apply)
1852     {
1853       opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1854 							ve_info_hash,
1855 							ve_info_eq, free);
1856       opt_info->var_to_expand_head = NULL;
1857       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1858     }
1859 
1860   for (i = 0; i < loop->num_nodes; i++)
1861     {
1862       bb = body[i];
1863       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1864 	continue;
1865 
1866       FOR_BB_INSNS (bb, insn)
1867       {
1868         if (!INSN_P (insn))
1869           continue;
1870 
1871         if (opt_info->insns_to_split)
1872           ivts = analyze_iv_to_split_insn (insn);
1873 
1874         if (ivts)
1875           {
1876             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1877 	    gcc_assert (*slot1 == NULL);
1878             *slot1 = ivts;
1879 	    *opt_info->iv_to_split_tail = ivts;
1880 	    opt_info->iv_to_split_tail = &ivts->next;
1881             continue;
1882           }
1883 
1884         if (opt_info->insns_with_var_to_expand)
1885           ves = analyze_insn_to_expand_var (loop, insn);
1886 
1887         if (ves)
1888           {
1889             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1890 	    gcc_assert (*slot2 == NULL);
1891             *slot2 = ves;
1892 	    *opt_info->var_to_expand_tail = ves;
1893 	    opt_info->var_to_expand_tail = &ves->next;
1894           }
1895       }
1896     }
1897 
1898   VEC_free (edge, heap, edges);
1899   free (body);
1900   return opt_info;
1901 }
1902 
1903 /* Called just before loop duplication.  Records start of duplicated area
1904    to OPT_INFO.  */
1905 
1906 static void
1907 opt_info_start_duplication (struct opt_info *opt_info)
1908 {
1909   if (opt_info)
1910     opt_info->first_new_block = last_basic_block;
1911 }
1912 
1913 /* Determine the number of iterations between initialization of the base
1914    variable and the current copy (N_COPY).  N_COPIES is the total number
1915    of newly created copies.  UNROLLING is true if we are unrolling
1916    (not peeling) the loop.  */
1917 
1918 static unsigned
1919 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1920 {
1921   if (unrolling)
1922     {
1923       /* If we are unrolling, initialization is done in the original loop
1924 	 body (number 0).  */
1925       return n_copy;
1926     }
1927   else
1928     {
1929       /* If we are peeling, the copy in that the initialization occurs has
1930 	 number 1.  The original loop (number 0) is the last.  */
1931       if (n_copy)
1932 	return n_copy - 1;
1933       else
1934 	return n_copies;
1935     }
1936 }
1937 
1938 /* Locate in EXPR the expression corresponding to the location recorded
1939    in IVTS, and return a pointer to the RTX for this location.  */
1940 
1941 static rtx *
1942 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1943 {
1944   unsigned i;
1945   rtx *ret = &expr;
1946 
1947   for (i = 0; i < ivts->n_loc; i++)
1948     ret = &XEXP (*ret, ivts->loc[i]);
1949 
1950   return ret;
1951 }
1952 
1953 /* Allocate basic variable for the induction variable chain.  */
1954 
1955 static void
1956 allocate_basic_variable (struct iv_to_split *ivts)
1957 {
1958   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1959 
1960   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1961 }
1962 
1963 /* Insert initialization of basic variable of IVTS before INSN, taking
1964    the initial value from INSN.  */
1965 
1966 static void
1967 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1968 {
1969   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1970   rtx seq;
1971 
1972   start_sequence ();
1973   expr = force_operand (expr, ivts->base_var);
1974   if (expr != ivts->base_var)
1975     emit_move_insn (ivts->base_var, expr);
1976   seq = get_insns ();
1977   end_sequence ();
1978 
1979   emit_insn_before (seq, insn);
1980 }
1981 
1982 /* Replace the use of induction variable described in IVTS in INSN
1983    by base variable + DELTA * step.  */
1984 
1985 static void
1986 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1987 {
1988   rtx expr, *loc, seq, incr, var;
1989   enum machine_mode mode = GET_MODE (ivts->base_var);
1990   rtx src, dest, set;
1991 
1992   /* Construct base + DELTA * step.  */
1993   if (!delta)
1994     expr = ivts->base_var;
1995   else
1996     {
1997       incr = simplify_gen_binary (MULT, mode,
1998 				  ivts->step, gen_int_mode (delta, mode));
1999       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
2000 				  ivts->base_var, incr);
2001     }
2002 
2003   /* Figure out where to do the replacement.  */
2004   loc = get_ivts_expr (single_set (insn), ivts);
2005 
2006   /* If we can make the replacement right away, we're done.  */
2007   if (validate_change (insn, loc, expr, 0))
2008     return;
2009 
2010   /* Otherwise, force EXPR into a register and try again.  */
2011   start_sequence ();
2012   var = gen_reg_rtx (mode);
2013   expr = force_operand (expr, var);
2014   if (expr != var)
2015     emit_move_insn (var, expr);
2016   seq = get_insns ();
2017   end_sequence ();
2018   emit_insn_before (seq, insn);
2019 
2020   if (validate_change (insn, loc, var, 0))
2021     return;
2022 
2023   /* The last chance.  Try recreating the assignment in insn
2024      completely from scratch.  */
2025   set = single_set (insn);
2026   gcc_assert (set);
2027 
2028   start_sequence ();
2029   *loc = var;
2030   src = copy_rtx (SET_SRC (set));
2031   dest = copy_rtx (SET_DEST (set));
2032   src = force_operand (src, dest);
2033   if (src != dest)
2034     emit_move_insn (dest, src);
2035   seq = get_insns ();
2036   end_sequence ();
2037 
2038   emit_insn_before (seq, insn);
2039   delete_insn (insn);
2040 }
2041 
2042 
2043 /* Return one expansion of the accumulator recorded in struct VE.  */
2044 
2045 static rtx
2046 get_expansion (struct var_to_expand *ve)
2047 {
2048   rtx reg;
2049 
2050   if (ve->reuse_expansion == 0)
2051     reg = ve->reg;
2052   else
2053     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
2054 
2055   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
2056     ve->reuse_expansion = 0;
2057   else
2058     ve->reuse_expansion++;
2059 
2060   return reg;
2061 }
2062 
2063 
2064 /* Given INSN replace the uses of the accumulator recorded in VE
2065    with a new register.  */
2066 
2067 static void
2068 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
2069 {
2070   rtx new_reg, set;
2071   bool really_new_expansion = false;
2072 
2073   set = single_set (insn);
2074   gcc_assert (set);
2075 
2076   /* Generate a new register only if the expansion limit has not been
2077      reached.  Else reuse an already existing expansion.  */
2078   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
2079     {
2080       really_new_expansion = true;
2081       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
2082     }
2083   else
2084     new_reg = get_expansion (ve);
2085 
2086   validate_change (insn, &SET_DEST (set), new_reg, 1);
2087   validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
2088 
2089   if (apply_change_group ())
2090     if (really_new_expansion)
2091       {
2092         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
2093         ve->expansion_count++;
2094       }
2095 }
2096 
2097 /* Initialize the variable expansions in loop preheader.  PLACE is the
2098    loop-preheader basic block where the initialization of the
2099    expansions should take place.  The expansions are initialized with
2100    (-0) when the operation is plus or minus to honor sign zero.  This
2101    way we can prevent cases where the sign of the final result is
2102    effected by the sign of the expansion.  Here is an example to
2103    demonstrate this:
2104 
2105    for (i = 0 ; i < n; i++)
2106      sum += something;
2107 
2108    ==>
2109 
2110    sum += something
2111    ....
2112    i = i+1;
2113    sum1 += something
2114    ....
2115    i = i+1
2116    sum2 += something;
2117    ....
2118 
2119    When SUM is initialized with -zero and SOMETHING is also -zero; the
2120    final result of sum should be -zero thus the expansions sum1 and sum2
2121    should be initialized with -zero as well (otherwise we will get +zero
2122    as the final result).  */
2123 
2124 static void
2125 insert_var_expansion_initialization (struct var_to_expand *ve,
2126 				     basic_block place)
2127 {
2128   rtx seq, var, zero_init, insn;
2129   unsigned i;
2130   enum machine_mode mode = GET_MODE (ve->reg);
2131   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
2132 
2133   if (VEC_length (rtx, ve->var_expansions) == 0)
2134     return;
2135 
2136   start_sequence ();
2137   switch (ve->op)
2138     {
2139     case FMA:
2140       /* Note that we only accumulate FMA via the ADD operand.  */
2141     case PLUS:
2142     case MINUS:
2143       FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2144         {
2145 	  if (honor_signed_zero_p)
2146 	    zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
2147 	  else
2148 	    zero_init = CONST0_RTX (mode);
2149           emit_move_insn (var, zero_init);
2150         }
2151       break;
2152 
2153     case MULT:
2154       FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2155         {
2156           zero_init = CONST1_RTX (GET_MODE (var));
2157           emit_move_insn (var, zero_init);
2158         }
2159       break;
2160 
2161     default:
2162       gcc_unreachable ();
2163     }
2164 
2165   seq = get_insns ();
2166   end_sequence ();
2167 
2168   insn = BB_HEAD (place);
2169   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2170     insn = NEXT_INSN (insn);
2171 
2172   emit_insn_after (seq, insn);
2173 }
2174 
2175 /* Combine the variable expansions at the loop exit.  PLACE is the
2176    loop exit basic block where the summation of the expansions should
2177    take place.  */
2178 
2179 static void
2180 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
2181 {
2182   rtx sum = ve->reg;
2183   rtx expr, seq, var, insn;
2184   unsigned i;
2185 
2186   if (VEC_length (rtx, ve->var_expansions) == 0)
2187     return;
2188 
2189   start_sequence ();
2190   switch (ve->op)
2191     {
2192     case FMA:
2193       /* Note that we only accumulate FMA via the ADD operand.  */
2194     case PLUS:
2195     case MINUS:
2196       FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2197 	sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
2198       break;
2199 
2200     case MULT:
2201       FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2202 	sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
2203       break;
2204 
2205     default:
2206       gcc_unreachable ();
2207     }
2208 
2209   expr = force_operand (sum, ve->reg);
2210   if (expr != ve->reg)
2211     emit_move_insn (ve->reg, expr);
2212   seq = get_insns ();
2213   end_sequence ();
2214 
2215   insn = BB_HEAD (place);
2216   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2217     insn = NEXT_INSN (insn);
2218 
2219   emit_insn_after (seq, insn);
2220 }
2221 
2222 /* Apply loop optimizations in loop copies using the
2223    data which gathered during the unrolling.  Structure
2224    OPT_INFO record that data.
2225 
2226    UNROLLING is true if we unrolled (not peeled) the loop.
2227    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2228    the loop (as it should happen in complete unrolling, but not in ordinary
2229    peeling of the loop).  */
2230 
2231 static void
2232 apply_opt_in_copies (struct opt_info *opt_info,
2233                      unsigned n_copies, bool unrolling,
2234                      bool rewrite_original_loop)
2235 {
2236   unsigned i, delta;
2237   basic_block bb, orig_bb;
2238   rtx insn, orig_insn, next;
2239   struct iv_to_split ivts_templ, *ivts;
2240   struct var_to_expand ve_templ, *ves;
2241 
2242   /* Sanity check -- we need to put initialization in the original loop
2243      body.  */
2244   gcc_assert (!unrolling || rewrite_original_loop);
2245 
2246   /* Allocate the basic variables (i0).  */
2247   if (opt_info->insns_to_split)
2248     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2249       allocate_basic_variable (ivts);
2250 
2251   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2252     {
2253       bb = BASIC_BLOCK (i);
2254       orig_bb = get_bb_original (bb);
2255 
2256       /* bb->aux holds position in copy sequence initialized by
2257 	 duplicate_loop_to_header_edge.  */
2258       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2259 					unrolling);
2260       bb->aux = 0;
2261       orig_insn = BB_HEAD (orig_bb);
2262       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2263         {
2264           next = NEXT_INSN (insn);
2265 	  if (!INSN_P (insn)
2266 	      || (DEBUG_INSN_P (insn)
2267 		  && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2268             continue;
2269 
2270 	  while (!INSN_P (orig_insn)
2271 		 || (DEBUG_INSN_P (orig_insn)
2272 		     && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2273 			 == LABEL_DECL)))
2274             orig_insn = NEXT_INSN (orig_insn);
2275 
2276           ivts_templ.insn = orig_insn;
2277           ve_templ.insn = orig_insn;
2278 
2279           /* Apply splitting iv optimization.  */
2280           if (opt_info->insns_to_split)
2281             {
2282               ivts = (struct iv_to_split *)
2283 		htab_find (opt_info->insns_to_split, &ivts_templ);
2284 
2285               if (ivts)
2286                 {
2287 		  gcc_assert (GET_CODE (PATTERN (insn))
2288 			      == GET_CODE (PATTERN (orig_insn)));
2289 
2290                   if (!delta)
2291                     insert_base_initialization (ivts, insn);
2292                   split_iv (ivts, insn, delta);
2293                 }
2294             }
2295           /* Apply variable expansion optimization.  */
2296           if (unrolling && opt_info->insns_with_var_to_expand)
2297             {
2298               ves = (struct var_to_expand *)
2299 		htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2300               if (ves)
2301                 {
2302 		  gcc_assert (GET_CODE (PATTERN (insn))
2303 			      == GET_CODE (PATTERN (orig_insn)));
2304                   expand_var_during_unrolling (ves, insn);
2305                 }
2306             }
2307           orig_insn = NEXT_INSN (orig_insn);
2308         }
2309     }
2310 
2311   if (!rewrite_original_loop)
2312     return;
2313 
2314   /* Initialize the variable expansions in the loop preheader
2315      and take care of combining them at the loop exit.  */
2316   if (opt_info->insns_with_var_to_expand)
2317     {
2318       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2319 	insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2320       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2321 	combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2322     }
2323 
2324   /* Rewrite also the original loop body.  Find them as originals of the blocks
2325      in the last copied iteration, i.e. those that have
2326      get_bb_copy (get_bb_original (bb)) == bb.  */
2327   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2328     {
2329       bb = BASIC_BLOCK (i);
2330       orig_bb = get_bb_original (bb);
2331       if (get_bb_copy (orig_bb) != bb)
2332 	continue;
2333 
2334       delta = determine_split_iv_delta (0, n_copies, unrolling);
2335       for (orig_insn = BB_HEAD (orig_bb);
2336            orig_insn != NEXT_INSN (BB_END (bb));
2337            orig_insn = next)
2338         {
2339           next = NEXT_INSN (orig_insn);
2340 
2341           if (!INSN_P (orig_insn))
2342  	    continue;
2343 
2344           ivts_templ.insn = orig_insn;
2345           if (opt_info->insns_to_split)
2346             {
2347               ivts = (struct iv_to_split *)
2348 		htab_find (opt_info->insns_to_split, &ivts_templ);
2349               if (ivts)
2350                 {
2351                   if (!delta)
2352                     insert_base_initialization (ivts, orig_insn);
2353                   split_iv (ivts, orig_insn, delta);
2354                   continue;
2355                 }
2356             }
2357 
2358         }
2359     }
2360 }
2361 
2362 /* Release OPT_INFO.  */
2363 
2364 static void
2365 free_opt_info (struct opt_info *opt_info)
2366 {
2367   if (opt_info->insns_to_split)
2368     htab_delete (opt_info->insns_to_split);
2369   if (opt_info->insns_with_var_to_expand)
2370     {
2371       struct var_to_expand *ves;
2372 
2373       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2374 	VEC_free (rtx, heap, ves->var_expansions);
2375       htab_delete (opt_info->insns_with_var_to_expand);
2376     }
2377   free (opt_info);
2378 }
2379