110d565efSmrg /* Loop unrolling.
2*ec02198aSmrg    Copyright (C) 2002-2020 Free Software Foundation, Inc.
310d565efSmrg 
410d565efSmrg This file is part of GCC.
510d565efSmrg 
610d565efSmrg GCC is free software; you can redistribute it and/or modify it under
710d565efSmrg the terms of the GNU General Public License as published by the Free
810d565efSmrg Software Foundation; either version 3, or (at your option) any later
910d565efSmrg version.
1010d565efSmrg 
1110d565efSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1210d565efSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
1310d565efSmrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1410d565efSmrg for more details.
1510d565efSmrg 
1610d565efSmrg You should have received a copy of the GNU General Public License
1710d565efSmrg along with GCC; see the file COPYING3.  If not see
1810d565efSmrg <http://www.gnu.org/licenses/>.  */
1910d565efSmrg 
2010d565efSmrg #include "config.h"
2110d565efSmrg #include "system.h"
2210d565efSmrg #include "coretypes.h"
2310d565efSmrg #include "backend.h"
2410d565efSmrg #include "target.h"
2510d565efSmrg #include "rtl.h"
2610d565efSmrg #include "tree.h"
2710d565efSmrg #include "cfghooks.h"
2810d565efSmrg #include "memmodel.h"
2910d565efSmrg #include "optabs.h"
3010d565efSmrg #include "emit-rtl.h"
3110d565efSmrg #include "recog.h"
3210d565efSmrg #include "profile.h"
3310d565efSmrg #include "cfgrtl.h"
3410d565efSmrg #include "cfgloop.h"
3510d565efSmrg #include "dojump.h"
3610d565efSmrg #include "expr.h"
3710d565efSmrg #include "dumpfile.h"
3810d565efSmrg 
3910d565efSmrg /* This pass performs loop unrolling.  We only perform this
4010d565efSmrg    optimization on innermost loops (with single exception) because
4110d565efSmrg    the impact on performance is greatest here, and we want to avoid
4210d565efSmrg    unnecessary code size growth.  The gain is caused by greater sequentiality
4310d565efSmrg    of code, better code to optimize for further passes and in some cases
4410d565efSmrg    by fewer testings of exit conditions.  The main problem is code growth,
4510d565efSmrg    that impacts performance negatively due to effect of caches.
4610d565efSmrg 
4710d565efSmrg    What we do:
4810d565efSmrg 
4910d565efSmrg    -- unrolling of loops that roll constant times; this is almost always
5010d565efSmrg       win, as we get rid of exit condition tests.
5110d565efSmrg    -- unrolling of loops that roll number of times that we can compute
5210d565efSmrg       in runtime; we also get rid of exit condition tests here, but there
5310d565efSmrg       is the extra expense for calculating the number of iterations
5410d565efSmrg    -- simple unrolling of remaining loops; this is performed only if we
5510d565efSmrg       are asked to, as the gain is questionable in this case and often
5610d565efSmrg       it may even slow down the code
5710d565efSmrg    For more detailed descriptions of each of those, see comments at
5810d565efSmrg    appropriate function below.
5910d565efSmrg 
6010d565efSmrg    There is a lot of parameters (defined and described in params.def) that
6110d565efSmrg    control how much we unroll.
6210d565efSmrg 
6310d565efSmrg    ??? A great problem is that we don't have a good way how to determine
6410d565efSmrg    how many times we should unroll the loop; the experiments I have made
6510d565efSmrg    showed that this choice may affect performance in order of several %.
6610d565efSmrg    */
6710d565efSmrg 
6810d565efSmrg /* Information about induction variables to split.  */
6910d565efSmrg 
7010d565efSmrg struct iv_to_split
7110d565efSmrg {
7210d565efSmrg   rtx_insn *insn;	/* The insn in that the induction variable occurs.  */
7310d565efSmrg   rtx orig_var;		/* The variable (register) for the IV before split.  */
7410d565efSmrg   rtx base_var;		/* The variable on that the values in the further
7510d565efSmrg 			   iterations are based.  */
7610d565efSmrg   rtx step;		/* Step of the induction variable.  */
7710d565efSmrg   struct iv_to_split *next; /* Next entry in walking order.  */
7810d565efSmrg };
7910d565efSmrg 
8010d565efSmrg /* Information about accumulators to expand.  */
8110d565efSmrg 
8210d565efSmrg struct var_to_expand
8310d565efSmrg {
8410d565efSmrg   rtx_insn *insn;	           /* The insn in that the variable expansion occurs.  */
8510d565efSmrg   rtx reg;                         /* The accumulator which is expanded.  */
8610d565efSmrg   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
8710d565efSmrg   struct var_to_expand *next;	   /* Next entry in walking order.  */
8810d565efSmrg   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
8910d565efSmrg                                       or multiplication.  */
9010d565efSmrg   int expansion_count;             /* Count the number of expansions generated so far.  */
9110d565efSmrg   int reuse_expansion;             /* The expansion we intend to reuse to expand
9210d565efSmrg                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
9310d565efSmrg                                       the original accumulator.  Else use
9410d565efSmrg                                       var_expansions[REUSE_EXPANSION - 1].  */
9510d565efSmrg };
9610d565efSmrg 
9710d565efSmrg /* Hashtable helper for iv_to_split.  */
9810d565efSmrg 
9910d565efSmrg struct iv_split_hasher : free_ptr_hash <iv_to_split>
10010d565efSmrg {
10110d565efSmrg   static inline hashval_t hash (const iv_to_split *);
10210d565efSmrg   static inline bool equal (const iv_to_split *, const iv_to_split *);
10310d565efSmrg };
10410d565efSmrg 
10510d565efSmrg 
10610d565efSmrg /* A hash function for information about insns to split.  */
10710d565efSmrg 
10810d565efSmrg inline hashval_t
hash(const iv_to_split * ivts)10910d565efSmrg iv_split_hasher::hash (const iv_to_split *ivts)
11010d565efSmrg {
11110d565efSmrg   return (hashval_t) INSN_UID (ivts->insn);
11210d565efSmrg }
11310d565efSmrg 
11410d565efSmrg /* An equality functions for information about insns to split.  */
11510d565efSmrg 
11610d565efSmrg inline bool
equal(const iv_to_split * i1,const iv_to_split * i2)11710d565efSmrg iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
11810d565efSmrg {
11910d565efSmrg   return i1->insn == i2->insn;
12010d565efSmrg }
12110d565efSmrg 
12210d565efSmrg /* Hashtable helper for iv_to_split.  */
12310d565efSmrg 
12410d565efSmrg struct var_expand_hasher : free_ptr_hash <var_to_expand>
12510d565efSmrg {
12610d565efSmrg   static inline hashval_t hash (const var_to_expand *);
12710d565efSmrg   static inline bool equal (const var_to_expand *, const var_to_expand *);
12810d565efSmrg };
12910d565efSmrg 
13010d565efSmrg /* Return a hash for VES.  */
13110d565efSmrg 
13210d565efSmrg inline hashval_t
hash(const var_to_expand * ves)13310d565efSmrg var_expand_hasher::hash (const var_to_expand *ves)
13410d565efSmrg {
13510d565efSmrg   return (hashval_t) INSN_UID (ves->insn);
13610d565efSmrg }
13710d565efSmrg 
13810d565efSmrg /* Return true if I1 and I2 refer to the same instruction.  */
13910d565efSmrg 
14010d565efSmrg inline bool
equal(const var_to_expand * i1,const var_to_expand * i2)14110d565efSmrg var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
14210d565efSmrg {
14310d565efSmrg   return i1->insn == i2->insn;
14410d565efSmrg }
14510d565efSmrg 
14610d565efSmrg /* Information about optimization applied in
14710d565efSmrg    the unrolled loop.  */
14810d565efSmrg 
14910d565efSmrg struct opt_info
15010d565efSmrg {
15110d565efSmrg   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
15210d565efSmrg 						  split.  */
15310d565efSmrg   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
15410d565efSmrg   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
15510d565efSmrg   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
15610d565efSmrg 					insns with accumulators to expand.  */
15710d565efSmrg   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
15810d565efSmrg   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
15910d565efSmrg   unsigned first_new_block;        /* The first basic block that was
16010d565efSmrg                                       duplicated.  */
16110d565efSmrg   basic_block loop_exit;           /* The loop exit basic block.  */
16210d565efSmrg   basic_block loop_preheader;      /* The loop preheader basic block.  */
16310d565efSmrg };
16410d565efSmrg 
165*ec02198aSmrg static void decide_unroll_stupid (class loop *, int);
166*ec02198aSmrg static void decide_unroll_constant_iterations (class loop *, int);
167*ec02198aSmrg static void decide_unroll_runtime_iterations (class loop *, int);
168*ec02198aSmrg static void unroll_loop_stupid (class loop *);
16910d565efSmrg static void decide_unrolling (int);
170*ec02198aSmrg static void unroll_loop_constant_iterations (class loop *);
171*ec02198aSmrg static void unroll_loop_runtime_iterations (class loop *);
172*ec02198aSmrg static struct opt_info *analyze_insns_in_loop (class loop *);
17310d565efSmrg static void opt_info_start_duplication (struct opt_info *);
17410d565efSmrg static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
17510d565efSmrg static void free_opt_info (struct opt_info *);
176*ec02198aSmrg static struct var_to_expand *analyze_insn_to_expand_var (class loop*, rtx_insn *);
177*ec02198aSmrg static bool referenced_in_one_insn_in_loop_p (class loop *, rtx, int *);
17810d565efSmrg static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
17910d565efSmrg static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
18010d565efSmrg static void insert_var_expansion_initialization (struct var_to_expand *,
18110d565efSmrg 						 basic_block);
18210d565efSmrg static void combine_var_copies_in_loop_exit (struct var_to_expand *,
18310d565efSmrg 					     basic_block);
18410d565efSmrg static rtx get_expansion (struct var_to_expand *);
18510d565efSmrg 
18610d565efSmrg /* Emit a message summarizing the unroll that will be
18710d565efSmrg    performed for LOOP, along with the loop's location LOCUS, if
18810d565efSmrg    appropriate given the dump or -fopt-info settings.  */
18910d565efSmrg 
19010d565efSmrg static void
report_unroll(class loop * loop,dump_location_t locus)191*ec02198aSmrg report_unroll (class loop *loop, dump_location_t locus)
19210d565efSmrg {
193c7a68eb7Smrg   dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
19410d565efSmrg 
19510d565efSmrg   if (loop->lpt_decision.decision == LPT_NONE)
19610d565efSmrg     return;
19710d565efSmrg 
19810d565efSmrg   if (!dump_enabled_p ())
19910d565efSmrg     return;
20010d565efSmrg 
2010fc04c29Smrg   dump_metadata_t metadata (report_flags, locus.get_impl_location ());
2020fc04c29Smrg   dump_printf_loc (metadata, locus.get_user_location (),
20310d565efSmrg                    "loop unrolled %d times",
20410d565efSmrg                    loop->lpt_decision.times);
205c7a68eb7Smrg   if (profile_info && loop->header->count.initialized_p ())
2060fc04c29Smrg     dump_printf (metadata,
20710d565efSmrg                  " (header execution count %d)",
208c7a68eb7Smrg                  (int)loop->header->count.to_gcov_type ());
20910d565efSmrg 
2100fc04c29Smrg   dump_printf (metadata, "\n");
21110d565efSmrg }
21210d565efSmrg 
21310d565efSmrg /* Decide whether unroll loops and how much.  */
21410d565efSmrg static void
decide_unrolling(int flags)21510d565efSmrg decide_unrolling (int flags)
21610d565efSmrg {
217*ec02198aSmrg   class loop *loop;
21810d565efSmrg 
21910d565efSmrg   /* Scan the loops, inner ones first.  */
22010d565efSmrg   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
22110d565efSmrg     {
22210d565efSmrg       loop->lpt_decision.decision = LPT_NONE;
2230fc04c29Smrg       dump_user_location_t locus = get_loop_location (loop);
22410d565efSmrg 
22510d565efSmrg       if (dump_enabled_p ())
226c7a68eb7Smrg 	dump_printf_loc (MSG_NOTE, locus,
227c7a68eb7Smrg 			 "considering unrolling loop %d at BB %d\n",
22810d565efSmrg 			 loop->num, loop->header->index);
22910d565efSmrg 
230c7a68eb7Smrg       if (loop->unroll == 1)
231c7a68eb7Smrg 	{
232c7a68eb7Smrg 	  if (dump_file)
233c7a68eb7Smrg 	    fprintf (dump_file,
234c7a68eb7Smrg 		     ";; Not unrolling loop, user didn't want it unrolled\n");
235c7a68eb7Smrg 	  continue;
236c7a68eb7Smrg 	}
237c7a68eb7Smrg 
23810d565efSmrg       /* Do not peel cold areas.  */
23910d565efSmrg       if (optimize_loop_for_size_p (loop))
24010d565efSmrg 	{
24110d565efSmrg 	  if (dump_file)
24210d565efSmrg 	    fprintf (dump_file, ";; Not considering loop, cold area\n");
24310d565efSmrg 	  continue;
24410d565efSmrg 	}
24510d565efSmrg 
24610d565efSmrg       /* Can the loop be manipulated?  */
24710d565efSmrg       if (!can_duplicate_loop_p (loop))
24810d565efSmrg 	{
24910d565efSmrg 	  if (dump_file)
25010d565efSmrg 	    fprintf (dump_file,
25110d565efSmrg 		     ";; Not considering loop, cannot duplicate\n");
25210d565efSmrg 	  continue;
25310d565efSmrg 	}
25410d565efSmrg 
25510d565efSmrg       /* Skip non-innermost loops.  */
25610d565efSmrg       if (loop->inner)
25710d565efSmrg 	{
25810d565efSmrg 	  if (dump_file)
25910d565efSmrg 	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
26010d565efSmrg 	  continue;
26110d565efSmrg 	}
26210d565efSmrg 
26310d565efSmrg       loop->ninsns = num_loop_insns (loop);
26410d565efSmrg       loop->av_ninsns = average_num_loop_insns (loop);
26510d565efSmrg 
266c7a68eb7Smrg       /* Try transformations one by one in decreasing order of priority.  */
26710d565efSmrg       decide_unroll_constant_iterations (loop, flags);
26810d565efSmrg       if (loop->lpt_decision.decision == LPT_NONE)
26910d565efSmrg 	decide_unroll_runtime_iterations (loop, flags);
27010d565efSmrg       if (loop->lpt_decision.decision == LPT_NONE)
27110d565efSmrg 	decide_unroll_stupid (loop, flags);
27210d565efSmrg 
27310d565efSmrg       report_unroll (loop, locus);
27410d565efSmrg     }
27510d565efSmrg }
27610d565efSmrg 
27710d565efSmrg /* Unroll LOOPS.  */
27810d565efSmrg void
unroll_loops(int flags)27910d565efSmrg unroll_loops (int flags)
28010d565efSmrg {
281*ec02198aSmrg   class loop *loop;
28210d565efSmrg   bool changed = false;
28310d565efSmrg 
28410d565efSmrg   /* Now decide rest of unrolling.  */
28510d565efSmrg   decide_unrolling (flags);
28610d565efSmrg 
28710d565efSmrg   /* Scan the loops, inner ones first.  */
28810d565efSmrg   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
28910d565efSmrg     {
29010d565efSmrg       /* And perform the appropriate transformations.  */
29110d565efSmrg       switch (loop->lpt_decision.decision)
29210d565efSmrg 	{
29310d565efSmrg 	case LPT_UNROLL_CONSTANT:
29410d565efSmrg 	  unroll_loop_constant_iterations (loop);
29510d565efSmrg 	  changed = true;
29610d565efSmrg 	  break;
29710d565efSmrg 	case LPT_UNROLL_RUNTIME:
29810d565efSmrg 	  unroll_loop_runtime_iterations (loop);
29910d565efSmrg 	  changed = true;
30010d565efSmrg 	  break;
30110d565efSmrg 	case LPT_UNROLL_STUPID:
30210d565efSmrg 	  unroll_loop_stupid (loop);
30310d565efSmrg 	  changed = true;
30410d565efSmrg 	  break;
30510d565efSmrg 	case LPT_NONE:
30610d565efSmrg 	  break;
30710d565efSmrg 	default:
30810d565efSmrg 	  gcc_unreachable ();
30910d565efSmrg 	}
31010d565efSmrg     }
31110d565efSmrg 
31210d565efSmrg     if (changed)
31310d565efSmrg       {
31410d565efSmrg 	calculate_dominance_info (CDI_DOMINATORS);
31510d565efSmrg 	fix_loop_structure (NULL);
31610d565efSmrg       }
31710d565efSmrg 
31810d565efSmrg   iv_analysis_done ();
31910d565efSmrg }
32010d565efSmrg 
32110d565efSmrg /* Check whether exit of the LOOP is at the end of loop body.  */
32210d565efSmrg 
32310d565efSmrg static bool
loop_exit_at_end_p(class loop * loop)324*ec02198aSmrg loop_exit_at_end_p (class loop *loop)
32510d565efSmrg {
326*ec02198aSmrg   class niter_desc *desc = get_simple_loop_desc (loop);
32710d565efSmrg   rtx_insn *insn;
32810d565efSmrg 
32910d565efSmrg   /* We should never have conditional in latch block.  */
33010d565efSmrg   gcc_assert (desc->in_edge->dest != loop->header);
33110d565efSmrg 
33210d565efSmrg   if (desc->in_edge->dest != loop->latch)
33310d565efSmrg     return false;
33410d565efSmrg 
33510d565efSmrg   /* Check that the latch is empty.  */
33610d565efSmrg   FOR_BB_INSNS (loop->latch, insn)
33710d565efSmrg     {
33810d565efSmrg       if (INSN_P (insn) && active_insn_p (insn))
33910d565efSmrg 	return false;
34010d565efSmrg     }
34110d565efSmrg 
34210d565efSmrg   return true;
34310d565efSmrg }
34410d565efSmrg 
34510d565efSmrg /* Decide whether to unroll LOOP iterating constant number of times
34610d565efSmrg    and how much.  */
34710d565efSmrg 
34810d565efSmrg static void
decide_unroll_constant_iterations(class loop * loop,int flags)349*ec02198aSmrg decide_unroll_constant_iterations (class loop *loop, int flags)
35010d565efSmrg {
35110d565efSmrg   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
352*ec02198aSmrg   class niter_desc *desc;
35310d565efSmrg   widest_int iterations;
35410d565efSmrg 
355c7a68eb7Smrg   /* If we were not asked to unroll this loop, just return back silently.  */
356c7a68eb7Smrg   if (!(flags & UAP_UNROLL) && !loop->unroll)
35710d565efSmrg     return;
35810d565efSmrg 
359c7a68eb7Smrg   if (dump_enabled_p ())
360c7a68eb7Smrg     dump_printf (MSG_NOTE,
361c7a68eb7Smrg 		 "considering unrolling loop with constant "
36210d565efSmrg 		 "number of iterations\n");
36310d565efSmrg 
36410d565efSmrg   /* nunroll = total number of copies of the original loop body in
365c7a68eb7Smrg      unrolled loop (i.e. if it is 2, we have to duplicate loop body once).  */
366*ec02198aSmrg   nunroll = param_max_unrolled_insns / loop->ninsns;
36710d565efSmrg   nunroll_by_av
368*ec02198aSmrg     = param_max_average_unrolled_insns / loop->av_ninsns;
36910d565efSmrg   if (nunroll > nunroll_by_av)
37010d565efSmrg     nunroll = nunroll_by_av;
371*ec02198aSmrg   if (nunroll > (unsigned) param_max_unroll_times)
372*ec02198aSmrg     nunroll = param_max_unroll_times;
37310d565efSmrg 
37410d565efSmrg   if (targetm.loop_unroll_adjust)
37510d565efSmrg     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
37610d565efSmrg 
37710d565efSmrg   /* Skip big loops.  */
37810d565efSmrg   if (nunroll <= 1)
37910d565efSmrg     {
38010d565efSmrg       if (dump_file)
38110d565efSmrg 	fprintf (dump_file, ";; Not considering loop, is too big\n");
38210d565efSmrg       return;
38310d565efSmrg     }
38410d565efSmrg 
38510d565efSmrg   /* Check for simple loops.  */
38610d565efSmrg   desc = get_simple_loop_desc (loop);
38710d565efSmrg 
38810d565efSmrg   /* Check number of iterations.  */
38910d565efSmrg   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
39010d565efSmrg     {
39110d565efSmrg       if (dump_file)
39210d565efSmrg 	fprintf (dump_file,
39310d565efSmrg 		 ";; Unable to prove that the loop iterates constant times\n");
39410d565efSmrg       return;
39510d565efSmrg     }
39610d565efSmrg 
397c7a68eb7Smrg   /* Check for an explicit unrolling factor.  */
398c7a68eb7Smrg   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
399c7a68eb7Smrg     {
400c7a68eb7Smrg       /* However we cannot unroll completely at the RTL level a loop with
401c7a68eb7Smrg 	 constant number of iterations; it should have been peeled instead.  */
402c7a68eb7Smrg       if (desc->niter == 0 || (unsigned) loop->unroll > desc->niter - 1)
403c7a68eb7Smrg 	{
404c7a68eb7Smrg 	  if (dump_file)
405c7a68eb7Smrg 	    fprintf (dump_file, ";; Loop should have been peeled\n");
406c7a68eb7Smrg 	}
407c7a68eb7Smrg       else
408c7a68eb7Smrg 	{
409c7a68eb7Smrg 	  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
410c7a68eb7Smrg 	  loop->lpt_decision.times = loop->unroll - 1;
411c7a68eb7Smrg 	}
412c7a68eb7Smrg       return;
413c7a68eb7Smrg     }
414c7a68eb7Smrg 
41510d565efSmrg   /* Check whether the loop rolls enough to consider.
41610d565efSmrg      Consult also loop bounds and profile; in the case the loop has more
41710d565efSmrg      than one exit it may well loop less than determined maximal number
41810d565efSmrg      of iterations.  */
41910d565efSmrg   if (desc->niter < 2 * nunroll
42010d565efSmrg       || ((get_estimated_loop_iterations (loop, &iterations)
42110d565efSmrg 	   || get_likely_max_loop_iterations (loop, &iterations))
42210d565efSmrg 	  && wi::ltu_p (iterations, 2 * nunroll)))
42310d565efSmrg     {
42410d565efSmrg       if (dump_file)
42510d565efSmrg 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
42610d565efSmrg       return;
42710d565efSmrg     }
42810d565efSmrg 
42910d565efSmrg   /* Success; now compute number of iterations to unroll.  We alter
43010d565efSmrg      nunroll so that as few as possible copies of loop body are
43110d565efSmrg      necessary, while still not decreasing the number of unrollings
43210d565efSmrg      too much (at most by 1).  */
43310d565efSmrg   best_copies = 2 * nunroll + 10;
43410d565efSmrg 
43510d565efSmrg   i = 2 * nunroll + 2;
436c7a68eb7Smrg   if (i > desc->niter - 2)
43710d565efSmrg     i = desc->niter - 2;
43810d565efSmrg 
43910d565efSmrg   for (; i >= nunroll - 1; i--)
44010d565efSmrg     {
44110d565efSmrg       unsigned exit_mod = desc->niter % (i + 1);
44210d565efSmrg 
44310d565efSmrg       if (!loop_exit_at_end_p (loop))
44410d565efSmrg 	n_copies = exit_mod + i + 1;
44510d565efSmrg       else if (exit_mod != (unsigned) i
44610d565efSmrg 	       || desc->noloop_assumptions != NULL_RTX)
44710d565efSmrg 	n_copies = exit_mod + i + 2;
44810d565efSmrg       else
44910d565efSmrg 	n_copies = i + 1;
45010d565efSmrg 
45110d565efSmrg       if (n_copies < best_copies)
45210d565efSmrg 	{
45310d565efSmrg 	  best_copies = n_copies;
45410d565efSmrg 	  best_unroll = i;
45510d565efSmrg 	}
45610d565efSmrg     }
45710d565efSmrg 
45810d565efSmrg   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
45910d565efSmrg   loop->lpt_decision.times = best_unroll;
46010d565efSmrg }
46110d565efSmrg 
46210d565efSmrg /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
46310d565efSmrg    The transformation does this:
46410d565efSmrg 
46510d565efSmrg    for (i = 0; i < 102; i++)
46610d565efSmrg      body;
46710d565efSmrg 
46810d565efSmrg    ==>  (LOOP->LPT_DECISION.TIMES == 3)
46910d565efSmrg 
47010d565efSmrg    i = 0;
47110d565efSmrg    body; i++;
47210d565efSmrg    body; i++;
47310d565efSmrg    while (i < 102)
47410d565efSmrg      {
47510d565efSmrg        body; i++;
47610d565efSmrg        body; i++;
47710d565efSmrg        body; i++;
47810d565efSmrg        body; i++;
47910d565efSmrg      }
48010d565efSmrg   */
48110d565efSmrg static void
unroll_loop_constant_iterations(class loop * loop)482*ec02198aSmrg unroll_loop_constant_iterations (class loop *loop)
48310d565efSmrg {
48410d565efSmrg   unsigned HOST_WIDE_INT niter;
48510d565efSmrg   unsigned exit_mod;
48610d565efSmrg   unsigned i;
48710d565efSmrg   edge e;
48810d565efSmrg   unsigned max_unroll = loop->lpt_decision.times;
489*ec02198aSmrg   class niter_desc *desc = get_simple_loop_desc (loop);
49010d565efSmrg   bool exit_at_end = loop_exit_at_end_p (loop);
49110d565efSmrg   struct opt_info *opt_info = NULL;
49210d565efSmrg   bool ok;
49310d565efSmrg 
49410d565efSmrg   niter = desc->niter;
49510d565efSmrg 
49610d565efSmrg   /* Should not get here (such loop should be peeled instead).  */
49710d565efSmrg   gcc_assert (niter > max_unroll + 1);
49810d565efSmrg 
49910d565efSmrg   exit_mod = niter % (max_unroll + 1);
50010d565efSmrg 
50110d565efSmrg   auto_sbitmap wont_exit (max_unroll + 2);
50210d565efSmrg   bitmap_ones (wont_exit);
50310d565efSmrg 
50410d565efSmrg   auto_vec<edge> remove_edges;
50510d565efSmrg   if (flag_split_ivs_in_unroller
50610d565efSmrg       || flag_variable_expansion_in_unroller)
50710d565efSmrg     opt_info = analyze_insns_in_loop (loop);
50810d565efSmrg 
50910d565efSmrg   if (!exit_at_end)
51010d565efSmrg     {
51110d565efSmrg       /* The exit is not at the end of the loop; leave exit test
51210d565efSmrg 	 in the first copy, so that the loops that start with test
51310d565efSmrg 	 of exit condition have continuous body after unrolling.  */
51410d565efSmrg 
51510d565efSmrg       if (dump_file)
51610d565efSmrg 	fprintf (dump_file, ";; Condition at beginning of loop.\n");
51710d565efSmrg 
51810d565efSmrg       /* Peel exit_mod iterations.  */
51910d565efSmrg       bitmap_clear_bit (wont_exit, 0);
52010d565efSmrg       if (desc->noloop_assumptions)
52110d565efSmrg 	bitmap_clear_bit (wont_exit, 1);
52210d565efSmrg 
52310d565efSmrg       if (exit_mod)
52410d565efSmrg 	{
52510d565efSmrg 	  opt_info_start_duplication (opt_info);
52610d565efSmrg           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
52710d565efSmrg 					      exit_mod,
52810d565efSmrg 					      wont_exit, desc->out_edge,
52910d565efSmrg 					      &remove_edges,
53010d565efSmrg 					      DLTHE_FLAG_UPDATE_FREQ
53110d565efSmrg 					      | (opt_info && exit_mod > 1
53210d565efSmrg 						 ? DLTHE_RECORD_COPY_NUMBER
53310d565efSmrg 						   : 0));
53410d565efSmrg 	  gcc_assert (ok);
53510d565efSmrg 
53610d565efSmrg           if (opt_info && exit_mod > 1)
53710d565efSmrg  	    apply_opt_in_copies (opt_info, exit_mod, false, false);
53810d565efSmrg 
53910d565efSmrg 	  desc->noloop_assumptions = NULL_RTX;
54010d565efSmrg 	  desc->niter -= exit_mod;
54110d565efSmrg 	  loop->nb_iterations_upper_bound -= exit_mod;
54210d565efSmrg 	  if (loop->any_estimate
54310d565efSmrg 	      && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
54410d565efSmrg 	    loop->nb_iterations_estimate -= exit_mod;
54510d565efSmrg 	  else
54610d565efSmrg 	    loop->any_estimate = false;
54710d565efSmrg 	  if (loop->any_likely_upper_bound
54810d565efSmrg 	      && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
54910d565efSmrg 	    loop->nb_iterations_likely_upper_bound -= exit_mod;
55010d565efSmrg 	  else
55110d565efSmrg 	    loop->any_likely_upper_bound = false;
55210d565efSmrg 	}
55310d565efSmrg 
55410d565efSmrg       bitmap_set_bit (wont_exit, 1);
55510d565efSmrg     }
55610d565efSmrg   else
55710d565efSmrg     {
55810d565efSmrg       /* Leave exit test in last copy, for the same reason as above if
55910d565efSmrg 	 the loop tests the condition at the end of loop body.  */
56010d565efSmrg 
56110d565efSmrg       if (dump_file)
56210d565efSmrg 	fprintf (dump_file, ";; Condition at end of loop.\n");
56310d565efSmrg 
56410d565efSmrg       /* We know that niter >= max_unroll + 2; so we do not need to care of
56510d565efSmrg 	 case when we would exit before reaching the loop.  So just peel
56610d565efSmrg 	 exit_mod + 1 iterations.  */
56710d565efSmrg       if (exit_mod != max_unroll
56810d565efSmrg 	  || desc->noloop_assumptions)
56910d565efSmrg 	{
57010d565efSmrg 	  bitmap_clear_bit (wont_exit, 0);
57110d565efSmrg 	  if (desc->noloop_assumptions)
57210d565efSmrg 	    bitmap_clear_bit (wont_exit, 1);
57310d565efSmrg 
57410d565efSmrg           opt_info_start_duplication (opt_info);
57510d565efSmrg 	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
57610d565efSmrg 					      exit_mod + 1,
57710d565efSmrg 					      wont_exit, desc->out_edge,
57810d565efSmrg 					      &remove_edges,
57910d565efSmrg 					      DLTHE_FLAG_UPDATE_FREQ
58010d565efSmrg 					      | (opt_info && exit_mod > 0
58110d565efSmrg 						 ? DLTHE_RECORD_COPY_NUMBER
58210d565efSmrg 						   : 0));
58310d565efSmrg 	  gcc_assert (ok);
58410d565efSmrg 
58510d565efSmrg           if (opt_info && exit_mod > 0)
58610d565efSmrg   	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
58710d565efSmrg 
58810d565efSmrg 	  desc->niter -= exit_mod + 1;
58910d565efSmrg 	  loop->nb_iterations_upper_bound -= exit_mod + 1;
59010d565efSmrg 	  if (loop->any_estimate
59110d565efSmrg 	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
59210d565efSmrg 	    loop->nb_iterations_estimate -= exit_mod + 1;
59310d565efSmrg 	  else
59410d565efSmrg 	    loop->any_estimate = false;
59510d565efSmrg 	  if (loop->any_likely_upper_bound
59610d565efSmrg 	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
59710d565efSmrg 	    loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
59810d565efSmrg 	  else
59910d565efSmrg 	    loop->any_likely_upper_bound = false;
60010d565efSmrg 	  desc->noloop_assumptions = NULL_RTX;
60110d565efSmrg 
60210d565efSmrg 	  bitmap_set_bit (wont_exit, 0);
60310d565efSmrg 	  bitmap_set_bit (wont_exit, 1);
60410d565efSmrg 	}
60510d565efSmrg 
60610d565efSmrg       bitmap_clear_bit (wont_exit, max_unroll);
60710d565efSmrg     }
60810d565efSmrg 
60910d565efSmrg   /* Now unroll the loop.  */
61010d565efSmrg 
61110d565efSmrg   opt_info_start_duplication (opt_info);
61210d565efSmrg   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
61310d565efSmrg 				      max_unroll,
61410d565efSmrg 				      wont_exit, desc->out_edge,
61510d565efSmrg 				      &remove_edges,
61610d565efSmrg 				      DLTHE_FLAG_UPDATE_FREQ
61710d565efSmrg 				      | (opt_info
61810d565efSmrg 					 ? DLTHE_RECORD_COPY_NUMBER
61910d565efSmrg 					   : 0));
62010d565efSmrg   gcc_assert (ok);
62110d565efSmrg 
62210d565efSmrg   if (opt_info)
62310d565efSmrg     {
62410d565efSmrg       apply_opt_in_copies (opt_info, max_unroll, true, true);
62510d565efSmrg       free_opt_info (opt_info);
62610d565efSmrg     }
62710d565efSmrg 
62810d565efSmrg   if (exit_at_end)
62910d565efSmrg     {
63010d565efSmrg       basic_block exit_block = get_bb_copy (desc->in_edge->src);
63110d565efSmrg       /* Find a new in and out edge; they are in the last copy we have made.  */
63210d565efSmrg 
63310d565efSmrg       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
63410d565efSmrg 	{
63510d565efSmrg 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
63610d565efSmrg 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
63710d565efSmrg 	}
63810d565efSmrg       else
63910d565efSmrg 	{
64010d565efSmrg 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
64110d565efSmrg 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
64210d565efSmrg 	}
64310d565efSmrg     }
64410d565efSmrg 
64510d565efSmrg   desc->niter /= max_unroll + 1;
64610d565efSmrg   loop->nb_iterations_upper_bound
64710d565efSmrg     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
64810d565efSmrg   if (loop->any_estimate)
64910d565efSmrg     loop->nb_iterations_estimate
65010d565efSmrg       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
65110d565efSmrg   if (loop->any_likely_upper_bound)
65210d565efSmrg     loop->nb_iterations_likely_upper_bound
65310d565efSmrg       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
65421ff1670Smrg   desc->niter_expr = gen_int_mode (desc->niter, desc->mode);
65510d565efSmrg 
65610d565efSmrg   /* Remove the edges.  */
65710d565efSmrg   FOR_EACH_VEC_ELT (remove_edges, i, e)
65810d565efSmrg     remove_path (e);
65910d565efSmrg 
66010d565efSmrg   if (dump_file)
66110d565efSmrg     fprintf (dump_file,
66210d565efSmrg 	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
66310d565efSmrg 	     max_unroll, num_loop_insns (loop));
66410d565efSmrg }
66510d565efSmrg 
66610d565efSmrg /* Decide whether to unroll LOOP iterating runtime computable number of times
66710d565efSmrg    and how much.  */
66810d565efSmrg static void
decide_unroll_runtime_iterations(class loop * loop,int flags)669*ec02198aSmrg decide_unroll_runtime_iterations (class loop *loop, int flags)
67010d565efSmrg {
67110d565efSmrg   unsigned nunroll, nunroll_by_av, i;
672*ec02198aSmrg   class niter_desc *desc;
67310d565efSmrg   widest_int iterations;
67410d565efSmrg 
675c7a68eb7Smrg   /* If we were not asked to unroll this loop, just return back silently.  */
676c7a68eb7Smrg   if (!(flags & UAP_UNROLL) && !loop->unroll)
67710d565efSmrg     return;
67810d565efSmrg 
679c7a68eb7Smrg   if (dump_enabled_p ())
680c7a68eb7Smrg     dump_printf (MSG_NOTE,
681c7a68eb7Smrg 		 "considering unrolling loop with runtime-"
68210d565efSmrg 		 "computable number of iterations\n");
68310d565efSmrg 
68410d565efSmrg   /* nunroll = total number of copies of the original loop body in
68510d565efSmrg      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
686*ec02198aSmrg   nunroll = param_max_unrolled_insns / loop->ninsns;
687*ec02198aSmrg   nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
68810d565efSmrg   if (nunroll > nunroll_by_av)
68910d565efSmrg     nunroll = nunroll_by_av;
690*ec02198aSmrg   if (nunroll > (unsigned) param_max_unroll_times)
691*ec02198aSmrg     nunroll = param_max_unroll_times;
69210d565efSmrg 
69310d565efSmrg   if (targetm.loop_unroll_adjust)
69410d565efSmrg     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
69510d565efSmrg 
696c7a68eb7Smrg   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
697c7a68eb7Smrg     nunroll = loop->unroll;
698c7a68eb7Smrg 
69910d565efSmrg   /* Skip big loops.  */
70010d565efSmrg   if (nunroll <= 1)
70110d565efSmrg     {
70210d565efSmrg       if (dump_file)
70310d565efSmrg 	fprintf (dump_file, ";; Not considering loop, is too big\n");
70410d565efSmrg       return;
70510d565efSmrg     }
70610d565efSmrg 
70710d565efSmrg   /* Check for simple loops.  */
70810d565efSmrg   desc = get_simple_loop_desc (loop);
70910d565efSmrg 
71010d565efSmrg   /* Check simpleness.  */
71110d565efSmrg   if (!desc->simple_p || desc->assumptions)
71210d565efSmrg     {
71310d565efSmrg       if (dump_file)
71410d565efSmrg 	fprintf (dump_file,
71510d565efSmrg 		 ";; Unable to prove that the number of iterations "
71610d565efSmrg 		 "can be counted in runtime\n");
71710d565efSmrg       return;
71810d565efSmrg     }
71910d565efSmrg 
72010d565efSmrg   if (desc->const_iter)
72110d565efSmrg     {
72210d565efSmrg       if (dump_file)
72310d565efSmrg 	fprintf (dump_file, ";; Loop iterates constant times\n");
72410d565efSmrg       return;
72510d565efSmrg     }
72610d565efSmrg 
72710d565efSmrg   /* Check whether the loop rolls.  */
72810d565efSmrg   if ((get_estimated_loop_iterations (loop, &iterations)
72910d565efSmrg        || get_likely_max_loop_iterations (loop, &iterations))
73010d565efSmrg       && wi::ltu_p (iterations, 2 * nunroll))
73110d565efSmrg     {
73210d565efSmrg       if (dump_file)
73310d565efSmrg 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
73410d565efSmrg       return;
73510d565efSmrg     }
73610d565efSmrg 
737c7a68eb7Smrg   /* Success; now force nunroll to be power of 2, as code-gen
738c7a68eb7Smrg      requires it, we are unable to cope with overflows in
739c7a68eb7Smrg      computation of number of iterations.  */
74010d565efSmrg   for (i = 1; 2 * i <= nunroll; i *= 2)
74110d565efSmrg     continue;
74210d565efSmrg 
74310d565efSmrg   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
74410d565efSmrg   loop->lpt_decision.times = i - 1;
74510d565efSmrg }
74610d565efSmrg 
74710d565efSmrg /* Splits edge E and inserts the sequence of instructions INSNS on it, and
74810d565efSmrg    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
74910d565efSmrg    and NULL is returned instead.  */
75010d565efSmrg 
75110d565efSmrg basic_block
split_edge_and_insert(edge e,rtx_insn * insns)75210d565efSmrg split_edge_and_insert (edge e, rtx_insn *insns)
75310d565efSmrg {
75410d565efSmrg   basic_block bb;
75510d565efSmrg 
75610d565efSmrg   if (!insns)
75710d565efSmrg     return NULL;
75810d565efSmrg   bb = split_edge (e);
75910d565efSmrg   emit_insn_after (insns, BB_END (bb));
76010d565efSmrg 
76110d565efSmrg   /* ??? We used to assume that INSNS can contain control flow insns, and
76210d565efSmrg      that we had to try to find sub basic blocks in BB to maintain a valid
76310d565efSmrg      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
76410d565efSmrg      and call break_superblocks when going out of cfglayout mode.  But it
76510d565efSmrg      turns out that this never happens; and that if it does ever happen,
76610d565efSmrg      the verify_flow_info at the end of the RTL loop passes would fail.
76710d565efSmrg 
76810d565efSmrg      There are two reasons why we expected we could have control flow insns
76910d565efSmrg      in INSNS.  The first is when a comparison has to be done in parts, and
77010d565efSmrg      the second is when the number of iterations is computed for loops with
77110d565efSmrg      the number of iterations known at runtime.  In both cases, test cases
77210d565efSmrg      to get control flow in INSNS appear to be impossible to construct:
77310d565efSmrg 
77410d565efSmrg       * If do_compare_rtx_and_jump needs several branches to do comparison
77510d565efSmrg 	in a mode that needs comparison by parts, we cannot analyze the
77610d565efSmrg 	number of iterations of the loop, and we never get to unrolling it.
77710d565efSmrg 
77810d565efSmrg       * The code in expand_divmod that was suspected to cause creation of
77910d565efSmrg 	branching code seems to be only accessed for signed division.  The
78010d565efSmrg 	divisions used by # of iterations analysis are always unsigned.
78110d565efSmrg 	Problems might arise on architectures that emits branching code
78210d565efSmrg 	for some operations that may appear in the unroller (especially
78310d565efSmrg 	for division), but we have no such architectures.
78410d565efSmrg 
78510d565efSmrg      Considering all this, it was decided that we should for now assume
78610d565efSmrg      that INSNS can in theory contain control flow insns, but in practice
78710d565efSmrg      it never does.  So we don't handle the theoretical case, and should
78810d565efSmrg      a real failure ever show up, we have a pretty good clue for how to
78910d565efSmrg      fix it.  */
79010d565efSmrg 
79110d565efSmrg   return bb;
79210d565efSmrg }
79310d565efSmrg 
79410d565efSmrg /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
79510d565efSmrg    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
79610d565efSmrg    in order to create a jump.  */
79710d565efSmrg 
79810d565efSmrg static rtx_insn *
compare_and_jump_seq(rtx op0,rtx op1,enum rtx_code comp,rtx_code_label * label,profile_probability prob,rtx_insn * cinsn)79910d565efSmrg compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
800c7a68eb7Smrg 		      rtx_code_label *label, profile_probability prob,
801c7a68eb7Smrg 		      rtx_insn *cinsn)
80210d565efSmrg {
80310d565efSmrg   rtx_insn *seq;
80410d565efSmrg   rtx_jump_insn *jump;
80510d565efSmrg   rtx cond;
80610d565efSmrg   machine_mode mode;
80710d565efSmrg 
80810d565efSmrg   mode = GET_MODE (op0);
80910d565efSmrg   if (mode == VOIDmode)
81010d565efSmrg     mode = GET_MODE (op1);
81110d565efSmrg 
81210d565efSmrg   start_sequence ();
81310d565efSmrg   if (GET_MODE_CLASS (mode) == MODE_CC)
81410d565efSmrg     {
81510d565efSmrg       /* A hack -- there seems to be no easy generic way how to make a
81610d565efSmrg 	 conditional jump from a ccmode comparison.  */
81710d565efSmrg       gcc_assert (cinsn);
81810d565efSmrg       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
81910d565efSmrg       gcc_assert (GET_CODE (cond) == comp);
82010d565efSmrg       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
82110d565efSmrg       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
82210d565efSmrg       emit_jump_insn (copy_insn (PATTERN (cinsn)));
82310d565efSmrg       jump = as_a <rtx_jump_insn *> (get_last_insn ());
82410d565efSmrg       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
82510d565efSmrg       LABEL_NUSES (JUMP_LABEL (jump))++;
82610d565efSmrg       redirect_jump (jump, label, 0);
82710d565efSmrg     }
82810d565efSmrg   else
82910d565efSmrg     {
83010d565efSmrg       gcc_assert (!cinsn);
83110d565efSmrg 
83210d565efSmrg       op0 = force_operand (op0, NULL_RTX);
83310d565efSmrg       op1 = force_operand (op1, NULL_RTX);
83410d565efSmrg       do_compare_rtx_and_jump (op0, op1, comp, 0,
835c7a68eb7Smrg 			       mode, NULL_RTX, NULL, label,
836c7a68eb7Smrg 			       profile_probability::uninitialized ());
83710d565efSmrg       jump = as_a <rtx_jump_insn *> (get_last_insn ());
83810d565efSmrg       jump->set_jump_target (label);
83910d565efSmrg       LABEL_NUSES (label)++;
84010d565efSmrg     }
841c7a68eb7Smrg   if (prob.initialized_p ())
842c7a68eb7Smrg     add_reg_br_prob_note (jump, prob);
84310d565efSmrg 
84410d565efSmrg   seq = get_insns ();
84510d565efSmrg   end_sequence ();
84610d565efSmrg 
84710d565efSmrg   return seq;
84810d565efSmrg }
84910d565efSmrg 
850c7a68eb7Smrg /* Unroll LOOP for which we are able to count number of iterations in
851c7a68eb7Smrg    runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
852c7a68eb7Smrg    power of two.  The transformation does this (with some extra care
853c7a68eb7Smrg    for case n < 0):
85410d565efSmrg 
85510d565efSmrg    for (i = 0; i < n; i++)
85610d565efSmrg      body;
85710d565efSmrg 
85810d565efSmrg    ==>  (LOOP->LPT_DECISION.TIMES == 3)
85910d565efSmrg 
86010d565efSmrg    i = 0;
86110d565efSmrg    mod = n % 4;
86210d565efSmrg 
86310d565efSmrg    switch (mod)
86410d565efSmrg      {
86510d565efSmrg        case 3:
86610d565efSmrg          body; i++;
86710d565efSmrg        case 2:
86810d565efSmrg          body; i++;
86910d565efSmrg        case 1:
87010d565efSmrg          body; i++;
87110d565efSmrg        case 0: ;
87210d565efSmrg      }
87310d565efSmrg 
87410d565efSmrg    while (i < n)
87510d565efSmrg      {
87610d565efSmrg        body; i++;
87710d565efSmrg        body; i++;
87810d565efSmrg        body; i++;
87910d565efSmrg        body; i++;
88010d565efSmrg      }
88110d565efSmrg    */
88210d565efSmrg static void
unroll_loop_runtime_iterations(class loop * loop)883*ec02198aSmrg unroll_loop_runtime_iterations (class loop *loop)
88410d565efSmrg {
88510d565efSmrg   rtx old_niter, niter, tmp;
88610d565efSmrg   rtx_insn *init_code, *branch_code;
887c7a68eb7Smrg   unsigned i, j;
888c7a68eb7Smrg   profile_probability p;
88910d565efSmrg   basic_block preheader, *body, swtch, ezc_swtch = NULL;
890c7a68eb7Smrg   int may_exit_copy;
891c7a68eb7Smrg   profile_count iter_count, new_count;
89210d565efSmrg   unsigned n_peel;
89310d565efSmrg   edge e;
89410d565efSmrg   bool extra_zero_check, last_may_exit;
89510d565efSmrg   unsigned max_unroll = loop->lpt_decision.times;
896*ec02198aSmrg   class niter_desc *desc = get_simple_loop_desc (loop);
89710d565efSmrg   bool exit_at_end = loop_exit_at_end_p (loop);
89810d565efSmrg   struct opt_info *opt_info = NULL;
89910d565efSmrg   bool ok;
90010d565efSmrg 
90110d565efSmrg   if (flag_split_ivs_in_unroller
90210d565efSmrg       || flag_variable_expansion_in_unroller)
90310d565efSmrg     opt_info = analyze_insns_in_loop (loop);
90410d565efSmrg 
90510d565efSmrg   /* Remember blocks whose dominators will have to be updated.  */
90610d565efSmrg   auto_vec<basic_block> dom_bbs;
90710d565efSmrg 
90810d565efSmrg   body = get_loop_body (loop);
90910d565efSmrg   for (i = 0; i < loop->num_nodes; i++)
91010d565efSmrg     {
91110d565efSmrg       vec<basic_block> ldom;
91210d565efSmrg       basic_block bb;
91310d565efSmrg 
91410d565efSmrg       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
91510d565efSmrg       FOR_EACH_VEC_ELT (ldom, j, bb)
91610d565efSmrg 	if (!flow_bb_inside_loop_p (loop, bb))
91710d565efSmrg 	  dom_bbs.safe_push (bb);
91810d565efSmrg 
91910d565efSmrg       ldom.release ();
92010d565efSmrg     }
92110d565efSmrg   free (body);
92210d565efSmrg 
92310d565efSmrg   if (!exit_at_end)
92410d565efSmrg     {
92510d565efSmrg       /* Leave exit in first copy (for explanation why see comment in
92610d565efSmrg 	 unroll_loop_constant_iterations).  */
92710d565efSmrg       may_exit_copy = 0;
92810d565efSmrg       n_peel = max_unroll - 1;
92910d565efSmrg       extra_zero_check = true;
93010d565efSmrg       last_may_exit = false;
93110d565efSmrg     }
93210d565efSmrg   else
93310d565efSmrg     {
93410d565efSmrg       /* Leave exit in last copy (for explanation why see comment in
93510d565efSmrg 	 unroll_loop_constant_iterations).  */
93610d565efSmrg       may_exit_copy = max_unroll;
93710d565efSmrg       n_peel = max_unroll;
93810d565efSmrg       extra_zero_check = false;
93910d565efSmrg       last_may_exit = true;
94010d565efSmrg     }
94110d565efSmrg 
94210d565efSmrg   /* Get expression for number of iterations.  */
94310d565efSmrg   start_sequence ();
94410d565efSmrg   old_niter = niter = gen_reg_rtx (desc->mode);
94510d565efSmrg   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
94610d565efSmrg   if (tmp != niter)
94710d565efSmrg     emit_move_insn (niter, tmp);
94810d565efSmrg 
94910d565efSmrg   /* For loops that exit at end and whose number of iterations is reliable,
95010d565efSmrg      add one to niter to account for first pass through loop body before
95110d565efSmrg      reaching exit test. */
95210d565efSmrg   if (exit_at_end && !desc->noloop_assumptions)
95310d565efSmrg     {
95410d565efSmrg       niter = expand_simple_binop (desc->mode, PLUS,
95510d565efSmrg 				   niter, const1_rtx,
95610d565efSmrg 				   NULL_RTX, 0, OPTAB_LIB_WIDEN);
95710d565efSmrg       old_niter = niter;
95810d565efSmrg     }
95910d565efSmrg 
96010d565efSmrg   /* Count modulo by ANDing it with max_unroll; we use the fact that
96110d565efSmrg      the number of unrollings is a power of two, and thus this is correct
96210d565efSmrg      even if there is overflow in the computation.  */
96310d565efSmrg   niter = expand_simple_binop (desc->mode, AND,
96410d565efSmrg 			       niter, gen_int_mode (max_unroll, desc->mode),
96510d565efSmrg 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
96610d565efSmrg 
96710d565efSmrg   init_code = get_insns ();
96810d565efSmrg   end_sequence ();
96910d565efSmrg   unshare_all_rtl_in_chain (init_code);
97010d565efSmrg 
97110d565efSmrg   /* Precondition the loop.  */
97210d565efSmrg   split_edge_and_insert (loop_preheader_edge (loop), init_code);
97310d565efSmrg 
97410d565efSmrg   auto_vec<edge> remove_edges;
97510d565efSmrg 
97610d565efSmrg   auto_sbitmap wont_exit (max_unroll + 2);
97710d565efSmrg 
97810d565efSmrg   if (extra_zero_check || desc->noloop_assumptions)
97910d565efSmrg     {
98010d565efSmrg       /* Peel the first copy of loop body.  Leave the exit test if the number
98110d565efSmrg 	 of iterations is not reliable.  Also record the place of the extra zero
98210d565efSmrg 	 check.  */
98310d565efSmrg       bitmap_clear (wont_exit);
98410d565efSmrg       if (!desc->noloop_assumptions)
98510d565efSmrg 	bitmap_set_bit (wont_exit, 1);
98610d565efSmrg       ezc_swtch = loop_preheader_edge (loop)->src;
98710d565efSmrg       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
98810d565efSmrg 					  1, wont_exit, desc->out_edge,
98910d565efSmrg 					  &remove_edges,
99010d565efSmrg 					  DLTHE_FLAG_UPDATE_FREQ);
99110d565efSmrg       gcc_assert (ok);
99210d565efSmrg     }
99310d565efSmrg 
99410d565efSmrg   /* Record the place where switch will be built for preconditioning.  */
99510d565efSmrg   swtch = split_edge (loop_preheader_edge (loop));
99610d565efSmrg 
997c7a68eb7Smrg   /* Compute count increments for each switch block and initialize
99810d565efSmrg      innermost switch block.  Switch blocks and peeled loop copies are built
99910d565efSmrg      from innermost outward.  */
1000c7a68eb7Smrg   iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
100110d565efSmrg   swtch->count = new_count;
100210d565efSmrg 
100310d565efSmrg   for (i = 0; i < n_peel; i++)
100410d565efSmrg     {
100510d565efSmrg       /* Peel the copy.  */
100610d565efSmrg       bitmap_clear (wont_exit);
100710d565efSmrg       if (i != n_peel - 1 || !last_may_exit)
100810d565efSmrg 	bitmap_set_bit (wont_exit, 1);
100910d565efSmrg       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
101010d565efSmrg 					  1, wont_exit, desc->out_edge,
101110d565efSmrg 					  &remove_edges,
101210d565efSmrg 					  DLTHE_FLAG_UPDATE_FREQ);
101310d565efSmrg       gcc_assert (ok);
101410d565efSmrg 
101510d565efSmrg       /* Create item for switch.  */
101610d565efSmrg       j = n_peel - i - (extra_zero_check ? 0 : 1);
1017c7a68eb7Smrg       p = profile_probability::always ().apply_scale (1, i + 2);
101810d565efSmrg 
101910d565efSmrg       preheader = split_edge (loop_preheader_edge (loop));
1020c7a68eb7Smrg       /* Add in count of edge from switch block.  */
102110d565efSmrg       preheader->count += iter_count;
102221ff1670Smrg       branch_code = compare_and_jump_seq (copy_rtx (niter),
102321ff1670Smrg 					  gen_int_mode (j, desc->mode), EQ,
1024c7a68eb7Smrg 					  block_label (preheader), p, NULL);
102510d565efSmrg 
102610d565efSmrg       /* We rely on the fact that the compare and jump cannot be optimized out,
102710d565efSmrg 	 and hence the cfg we create is correct.  */
102810d565efSmrg       gcc_assert (branch_code != NULL_RTX);
102910d565efSmrg 
103010d565efSmrg       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
103110d565efSmrg       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1032c7a68eb7Smrg       single_succ_edge (swtch)->probability = p.invert ();
103310d565efSmrg       new_count += iter_count;
103410d565efSmrg       swtch->count = new_count;
103510d565efSmrg       e = make_edge (swtch, preheader,
103610d565efSmrg 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
103710d565efSmrg       e->probability = p;
103810d565efSmrg     }
103910d565efSmrg 
104010d565efSmrg   if (extra_zero_check)
104110d565efSmrg     {
104210d565efSmrg       /* Add branch for zero iterations.  */
1043c7a68eb7Smrg       p = profile_probability::always ().apply_scale (1, max_unroll + 1);
104410d565efSmrg       swtch = ezc_swtch;
104510d565efSmrg       preheader = split_edge (loop_preheader_edge (loop));
1046c7a68eb7Smrg       /* Recompute count adjustments since initial peel copy may
104710d565efSmrg 	 have exited and reduced those values that were computed above.  */
1048c7a68eb7Smrg       iter_count = swtch->count.apply_scale (1, max_unroll + 1);
1049c7a68eb7Smrg       /* Add in count of edge from switch block.  */
105010d565efSmrg       preheader->count += iter_count;
105110d565efSmrg       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
105210d565efSmrg 					  block_label (preheader), p,
105310d565efSmrg 					  NULL);
105410d565efSmrg       gcc_assert (branch_code != NULL_RTX);
105510d565efSmrg 
105610d565efSmrg       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
105710d565efSmrg       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1058c7a68eb7Smrg       single_succ_edge (swtch)->probability = p.invert ();
105910d565efSmrg       e = make_edge (swtch, preheader,
106010d565efSmrg 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
106110d565efSmrg       e->probability = p;
106210d565efSmrg     }
106310d565efSmrg 
106410d565efSmrg   /* Recount dominators for outer blocks.  */
106510d565efSmrg   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
106610d565efSmrg 
106710d565efSmrg   /* And unroll loop.  */
106810d565efSmrg 
106910d565efSmrg   bitmap_ones (wont_exit);
107010d565efSmrg   bitmap_clear_bit (wont_exit, may_exit_copy);
107110d565efSmrg   opt_info_start_duplication (opt_info);
107210d565efSmrg 
107310d565efSmrg   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
107410d565efSmrg 				      max_unroll,
107510d565efSmrg 				      wont_exit, desc->out_edge,
107610d565efSmrg 				      &remove_edges,
107710d565efSmrg 				      DLTHE_FLAG_UPDATE_FREQ
107810d565efSmrg 				      | (opt_info
107910d565efSmrg 					 ? DLTHE_RECORD_COPY_NUMBER
108010d565efSmrg 					   : 0));
108110d565efSmrg   gcc_assert (ok);
108210d565efSmrg 
108310d565efSmrg   if (opt_info)
108410d565efSmrg     {
108510d565efSmrg       apply_opt_in_copies (opt_info, max_unroll, true, true);
108610d565efSmrg       free_opt_info (opt_info);
108710d565efSmrg     }
108810d565efSmrg 
108910d565efSmrg   if (exit_at_end)
109010d565efSmrg     {
109110d565efSmrg       basic_block exit_block = get_bb_copy (desc->in_edge->src);
109210d565efSmrg       /* Find a new in and out edge; they are in the last copy we have
109310d565efSmrg 	 made.  */
109410d565efSmrg 
109510d565efSmrg       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
109610d565efSmrg 	{
109710d565efSmrg 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
109810d565efSmrg 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
109910d565efSmrg 	}
110010d565efSmrg       else
110110d565efSmrg 	{
110210d565efSmrg 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
110310d565efSmrg 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
110410d565efSmrg 	}
110510d565efSmrg     }
110610d565efSmrg 
110710d565efSmrg   /* Remove the edges.  */
110810d565efSmrg   FOR_EACH_VEC_ELT (remove_edges, i, e)
110910d565efSmrg     remove_path (e);
111010d565efSmrg 
111110d565efSmrg   /* We must be careful when updating the number of iterations due to
111210d565efSmrg      preconditioning and the fact that the value must be valid at entry
111310d565efSmrg      of the loop.  After passing through the above code, we see that
111410d565efSmrg      the correct new number of iterations is this:  */
111510d565efSmrg   gcc_assert (!desc->const_iter);
111610d565efSmrg   desc->niter_expr =
111710d565efSmrg     simplify_gen_binary (UDIV, desc->mode, old_niter,
111810d565efSmrg 			 gen_int_mode (max_unroll + 1, desc->mode));
111910d565efSmrg   loop->nb_iterations_upper_bound
112010d565efSmrg     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
112110d565efSmrg   if (loop->any_estimate)
112210d565efSmrg     loop->nb_iterations_estimate
112310d565efSmrg       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
112410d565efSmrg   if (loop->any_likely_upper_bound)
112510d565efSmrg     loop->nb_iterations_likely_upper_bound
112610d565efSmrg       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
112710d565efSmrg   if (exit_at_end)
112810d565efSmrg     {
112910d565efSmrg       desc->niter_expr =
113010d565efSmrg 	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
113110d565efSmrg       desc->noloop_assumptions = NULL_RTX;
113210d565efSmrg       --loop->nb_iterations_upper_bound;
113310d565efSmrg       if (loop->any_estimate
113410d565efSmrg 	  && loop->nb_iterations_estimate != 0)
113510d565efSmrg 	--loop->nb_iterations_estimate;
113610d565efSmrg       else
113710d565efSmrg 	loop->any_estimate = false;
113810d565efSmrg       if (loop->any_likely_upper_bound
113910d565efSmrg 	  && loop->nb_iterations_likely_upper_bound != 0)
114010d565efSmrg 	--loop->nb_iterations_likely_upper_bound;
114110d565efSmrg       else
114210d565efSmrg 	loop->any_likely_upper_bound = false;
114310d565efSmrg     }
114410d565efSmrg 
114510d565efSmrg   if (dump_file)
114610d565efSmrg     fprintf (dump_file,
114710d565efSmrg 	     ";; Unrolled loop %d times, counting # of iterations "
114810d565efSmrg 	     "in runtime, %i insns\n",
114910d565efSmrg 	     max_unroll, num_loop_insns (loop));
115010d565efSmrg }
115110d565efSmrg 
115210d565efSmrg /* Decide whether to unroll LOOP stupidly and how much.  */
115310d565efSmrg static void
decide_unroll_stupid(class loop * loop,int flags)1154*ec02198aSmrg decide_unroll_stupid (class loop *loop, int flags)
115510d565efSmrg {
115610d565efSmrg   unsigned nunroll, nunroll_by_av, i;
1157*ec02198aSmrg   class niter_desc *desc;
115810d565efSmrg   widest_int iterations;
115910d565efSmrg 
1160c7a68eb7Smrg   /* If we were not asked to unroll this loop, just return back silently.  */
1161c7a68eb7Smrg   if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
116210d565efSmrg     return;
116310d565efSmrg 
1164c7a68eb7Smrg   if (dump_enabled_p ())
1165c7a68eb7Smrg     dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n");
116610d565efSmrg 
116710d565efSmrg   /* nunroll = total number of copies of the original loop body in
116810d565efSmrg      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1169*ec02198aSmrg   nunroll = param_max_unrolled_insns / loop->ninsns;
117010d565efSmrg   nunroll_by_av
1171*ec02198aSmrg     = param_max_average_unrolled_insns / loop->av_ninsns;
117210d565efSmrg   if (nunroll > nunroll_by_av)
117310d565efSmrg     nunroll = nunroll_by_av;
1174*ec02198aSmrg   if (nunroll > (unsigned) param_max_unroll_times)
1175*ec02198aSmrg     nunroll = param_max_unroll_times;
117610d565efSmrg 
117710d565efSmrg   if (targetm.loop_unroll_adjust)
117810d565efSmrg     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
117910d565efSmrg 
1180c7a68eb7Smrg   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
1181c7a68eb7Smrg     nunroll = loop->unroll;
1182c7a68eb7Smrg 
118310d565efSmrg   /* Skip big loops.  */
118410d565efSmrg   if (nunroll <= 1)
118510d565efSmrg     {
118610d565efSmrg       if (dump_file)
118710d565efSmrg 	fprintf (dump_file, ";; Not considering loop, is too big\n");
118810d565efSmrg       return;
118910d565efSmrg     }
119010d565efSmrg 
119110d565efSmrg   /* Check for simple loops.  */
119210d565efSmrg   desc = get_simple_loop_desc (loop);
119310d565efSmrg 
119410d565efSmrg   /* Check simpleness.  */
119510d565efSmrg   if (desc->simple_p && !desc->assumptions)
119610d565efSmrg     {
119710d565efSmrg       if (dump_file)
1198c7a68eb7Smrg 	fprintf (dump_file, ";; Loop is simple\n");
119910d565efSmrg       return;
120010d565efSmrg     }
120110d565efSmrg 
120210d565efSmrg   /* Do not unroll loops with branches inside -- it increases number
120310d565efSmrg      of mispredicts.
120410d565efSmrg      TODO: this heuristic needs tunning; call inside the loop body
120510d565efSmrg      is also relatively good reason to not unroll.  */
120610d565efSmrg   if (num_loop_branches (loop) > 1)
120710d565efSmrg     {
120810d565efSmrg       if (dump_file)
120910d565efSmrg 	fprintf (dump_file, ";; Not unrolling, contains branches\n");
121010d565efSmrg       return;
121110d565efSmrg     }
121210d565efSmrg 
121310d565efSmrg   /* Check whether the loop rolls.  */
121410d565efSmrg   if ((get_estimated_loop_iterations (loop, &iterations)
121510d565efSmrg        || get_likely_max_loop_iterations (loop, &iterations))
121610d565efSmrg       && wi::ltu_p (iterations, 2 * nunroll))
121710d565efSmrg     {
121810d565efSmrg       if (dump_file)
121910d565efSmrg 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
122010d565efSmrg       return;
122110d565efSmrg     }
122210d565efSmrg 
122310d565efSmrg   /* Success.  Now force nunroll to be power of 2, as it seems that this
122410d565efSmrg      improves results (partially because of better alignments, partially
122510d565efSmrg      because of some dark magic).  */
122610d565efSmrg   for (i = 1; 2 * i <= nunroll; i *= 2)
122710d565efSmrg     continue;
122810d565efSmrg 
122910d565efSmrg   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
123010d565efSmrg   loop->lpt_decision.times = i - 1;
123110d565efSmrg }
123210d565efSmrg 
123310d565efSmrg /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
123410d565efSmrg 
123510d565efSmrg    while (cond)
123610d565efSmrg      body;
123710d565efSmrg 
123810d565efSmrg    ==>  (LOOP->LPT_DECISION.TIMES == 3)
123910d565efSmrg 
124010d565efSmrg    while (cond)
124110d565efSmrg      {
124210d565efSmrg        body;
124310d565efSmrg        if (!cond) break;
124410d565efSmrg        body;
124510d565efSmrg        if (!cond) break;
124610d565efSmrg        body;
124710d565efSmrg        if (!cond) break;
124810d565efSmrg        body;
124910d565efSmrg      }
125010d565efSmrg    */
125110d565efSmrg static void
unroll_loop_stupid(class loop * loop)1252*ec02198aSmrg unroll_loop_stupid (class loop *loop)
125310d565efSmrg {
125410d565efSmrg   unsigned nunroll = loop->lpt_decision.times;
1255*ec02198aSmrg   class niter_desc *desc = get_simple_loop_desc (loop);
125610d565efSmrg   struct opt_info *opt_info = NULL;
125710d565efSmrg   bool ok;
125810d565efSmrg 
125910d565efSmrg   if (flag_split_ivs_in_unroller
126010d565efSmrg       || flag_variable_expansion_in_unroller)
126110d565efSmrg     opt_info = analyze_insns_in_loop (loop);
126210d565efSmrg 
126310d565efSmrg   auto_sbitmap wont_exit (nunroll + 1);
126410d565efSmrg   bitmap_clear (wont_exit);
126510d565efSmrg   opt_info_start_duplication (opt_info);
126610d565efSmrg 
126710d565efSmrg   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
126810d565efSmrg 				      nunroll, wont_exit,
126910d565efSmrg 				      NULL, NULL,
127010d565efSmrg 				      DLTHE_FLAG_UPDATE_FREQ
127110d565efSmrg 				      | (opt_info
127210d565efSmrg 					 ? DLTHE_RECORD_COPY_NUMBER
127310d565efSmrg 					   : 0));
127410d565efSmrg   gcc_assert (ok);
127510d565efSmrg 
127610d565efSmrg   if (opt_info)
127710d565efSmrg     {
127810d565efSmrg       apply_opt_in_copies (opt_info, nunroll, true, true);
127910d565efSmrg       free_opt_info (opt_info);
128010d565efSmrg     }
128110d565efSmrg 
128210d565efSmrg   if (desc->simple_p)
128310d565efSmrg     {
128410d565efSmrg       /* We indeed may get here provided that there are nontrivial assumptions
128510d565efSmrg 	 for a loop to be really simple.  We could update the counts, but the
128610d565efSmrg 	 problem is that we are unable to decide which exit will be taken
128710d565efSmrg 	 (not really true in case the number of iterations is constant,
128810d565efSmrg 	 but no one will do anything with this information, so we do not
128910d565efSmrg 	 worry about it).  */
129010d565efSmrg       desc->simple_p = false;
129110d565efSmrg     }
129210d565efSmrg 
129310d565efSmrg   if (dump_file)
129410d565efSmrg     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
129510d565efSmrg 	     nunroll, num_loop_insns (loop));
129610d565efSmrg }
129710d565efSmrg 
129810d565efSmrg /* Returns true if REG is referenced in one nondebug insn in LOOP.
129910d565efSmrg    Set *DEBUG_USES to the number of debug insns that reference the
130010d565efSmrg    variable.  */
130110d565efSmrg 
130210d565efSmrg static bool
referenced_in_one_insn_in_loop_p(class loop * loop,rtx reg,int * debug_uses)1303*ec02198aSmrg referenced_in_one_insn_in_loop_p (class loop *loop, rtx reg,
130410d565efSmrg 				  int *debug_uses)
130510d565efSmrg {
130610d565efSmrg   basic_block *body, bb;
130710d565efSmrg   unsigned i;
130810d565efSmrg   int count_ref = 0;
130910d565efSmrg   rtx_insn *insn;
131010d565efSmrg 
131110d565efSmrg   body = get_loop_body (loop);
131210d565efSmrg   for (i = 0; i < loop->num_nodes; i++)
131310d565efSmrg     {
131410d565efSmrg       bb = body[i];
131510d565efSmrg 
131610d565efSmrg       FOR_BB_INSNS (bb, insn)
131710d565efSmrg 	if (!rtx_referenced_p (reg, insn))
131810d565efSmrg 	  continue;
131910d565efSmrg 	else if (DEBUG_INSN_P (insn))
132010d565efSmrg 	  ++*debug_uses;
132110d565efSmrg 	else if (++count_ref > 1)
132210d565efSmrg 	  break;
132310d565efSmrg     }
132410d565efSmrg   free (body);
132510d565efSmrg   return (count_ref  == 1);
132610d565efSmrg }
132710d565efSmrg 
132810d565efSmrg /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
132910d565efSmrg 
133010d565efSmrg static void
reset_debug_uses_in_loop(class loop * loop,rtx reg,int debug_uses)1331*ec02198aSmrg reset_debug_uses_in_loop (class loop *loop, rtx reg, int debug_uses)
133210d565efSmrg {
133310d565efSmrg   basic_block *body, bb;
133410d565efSmrg   unsigned i;
133510d565efSmrg   rtx_insn *insn;
133610d565efSmrg 
133710d565efSmrg   body = get_loop_body (loop);
133810d565efSmrg   for (i = 0; debug_uses && i < loop->num_nodes; i++)
133910d565efSmrg     {
134010d565efSmrg       bb = body[i];
134110d565efSmrg 
134210d565efSmrg       FOR_BB_INSNS (bb, insn)
134310d565efSmrg 	if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
134410d565efSmrg 	  continue;
134510d565efSmrg 	else
134610d565efSmrg 	  {
134710d565efSmrg 	    validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
134810d565efSmrg 			     gen_rtx_UNKNOWN_VAR_LOC (), 0);
134910d565efSmrg 	    if (!--debug_uses)
135010d565efSmrg 	      break;
135110d565efSmrg 	  }
135210d565efSmrg     }
135310d565efSmrg   free (body);
135410d565efSmrg }
135510d565efSmrg 
135610d565efSmrg /* Determine whether INSN contains an accumulator
135710d565efSmrg    which can be expanded into separate copies,
135810d565efSmrg    one for each copy of the LOOP body.
135910d565efSmrg 
136010d565efSmrg    for (i = 0 ; i < n; i++)
136110d565efSmrg      sum += a[i];
136210d565efSmrg 
136310d565efSmrg    ==>
136410d565efSmrg 
136510d565efSmrg    sum += a[i]
136610d565efSmrg    ....
136710d565efSmrg    i = i+1;
136810d565efSmrg    sum1 += a[i]
136910d565efSmrg    ....
137010d565efSmrg    i = i+1
137110d565efSmrg    sum2 += a[i];
137210d565efSmrg    ....
137310d565efSmrg 
137410d565efSmrg    Return NULL if INSN contains no opportunity for expansion of accumulator.
137510d565efSmrg    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
137610d565efSmrg    information and return a pointer to it.
137710d565efSmrg */
137810d565efSmrg 
137910d565efSmrg static struct var_to_expand *
analyze_insn_to_expand_var(class loop * loop,rtx_insn * insn)1380*ec02198aSmrg analyze_insn_to_expand_var (class loop *loop, rtx_insn *insn)
138110d565efSmrg {
138210d565efSmrg   rtx set, dest, src;
138310d565efSmrg   struct var_to_expand *ves;
138410d565efSmrg   unsigned accum_pos;
138510d565efSmrg   enum rtx_code code;
138610d565efSmrg   int debug_uses = 0;
138710d565efSmrg 
138810d565efSmrg   set = single_set (insn);
138910d565efSmrg   if (!set)
139010d565efSmrg     return NULL;
139110d565efSmrg 
139210d565efSmrg   dest = SET_DEST (set);
139310d565efSmrg   src = SET_SRC (set);
139410d565efSmrg   code = GET_CODE (src);
139510d565efSmrg 
139610d565efSmrg   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
139710d565efSmrg     return NULL;
139810d565efSmrg 
139910d565efSmrg   if (FLOAT_MODE_P (GET_MODE (dest)))
140010d565efSmrg     {
140110d565efSmrg       if (!flag_associative_math)
140210d565efSmrg         return NULL;
140310d565efSmrg       /* In the case of FMA, we're also changing the rounding.  */
140410d565efSmrg       if (code == FMA && !flag_unsafe_math_optimizations)
140510d565efSmrg 	return NULL;
140610d565efSmrg     }
140710d565efSmrg 
140810d565efSmrg   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
140910d565efSmrg      in MD.  But if there is no optab to generate the insn, we cannot
141010d565efSmrg      perform the variable expansion.  This can happen if an MD provides
141110d565efSmrg      an insn but not a named pattern to generate it, for example to avoid
141210d565efSmrg      producing code that needs additional mode switches like for x87/mmx.
141310d565efSmrg 
141410d565efSmrg      So we check have_insn_for which looks for an optab for the operation
141510d565efSmrg      in SRC.  If it doesn't exist, we can't perform the expansion even
141610d565efSmrg      though INSN is valid.  */
141710d565efSmrg   if (!have_insn_for (code, GET_MODE (src)))
141810d565efSmrg     return NULL;
141910d565efSmrg 
142010d565efSmrg   if (!REG_P (dest)
142110d565efSmrg       && !(GET_CODE (dest) == SUBREG
142210d565efSmrg            && REG_P (SUBREG_REG (dest))))
142310d565efSmrg     return NULL;
142410d565efSmrg 
142510d565efSmrg   /* Find the accumulator use within the operation.  */
142610d565efSmrg   if (code == FMA)
142710d565efSmrg     {
142810d565efSmrg       /* We only support accumulation via FMA in the ADD position.  */
142910d565efSmrg       if (!rtx_equal_p  (dest, XEXP (src, 2)))
143010d565efSmrg 	return NULL;
143110d565efSmrg       accum_pos = 2;
143210d565efSmrg     }
143310d565efSmrg   else if (rtx_equal_p (dest, XEXP (src, 0)))
143410d565efSmrg     accum_pos = 0;
143510d565efSmrg   else if (rtx_equal_p (dest, XEXP (src, 1)))
143610d565efSmrg     {
143710d565efSmrg       /* The method of expansion that we are using; which includes the
143810d565efSmrg 	 initialization of the expansions with zero and the summation of
143910d565efSmrg          the expansions at the end of the computation will yield wrong
144010d565efSmrg 	 results for (x = something - x) thus avoid using it in that case.  */
144110d565efSmrg       if (code == MINUS)
144210d565efSmrg 	return NULL;
144310d565efSmrg       accum_pos = 1;
144410d565efSmrg     }
144510d565efSmrg   else
144610d565efSmrg     return NULL;
144710d565efSmrg 
144810d565efSmrg   /* It must not otherwise be used.  */
144910d565efSmrg   if (code == FMA)
145010d565efSmrg     {
145110d565efSmrg       if (rtx_referenced_p (dest, XEXP (src, 0))
145210d565efSmrg 	  || rtx_referenced_p (dest, XEXP (src, 1)))
145310d565efSmrg 	return NULL;
145410d565efSmrg     }
145510d565efSmrg   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
145610d565efSmrg     return NULL;
145710d565efSmrg 
145810d565efSmrg   /* It must be used in exactly one insn.  */
145910d565efSmrg   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
146010d565efSmrg     return NULL;
146110d565efSmrg 
146210d565efSmrg   if (dump_file)
146310d565efSmrg     {
146410d565efSmrg       fprintf (dump_file, "\n;; Expanding Accumulator ");
146510d565efSmrg       print_rtl (dump_file, dest);
146610d565efSmrg       fprintf (dump_file, "\n");
146710d565efSmrg     }
146810d565efSmrg 
146910d565efSmrg   if (debug_uses)
147010d565efSmrg     /* Instead of resetting the debug insns, we could replace each
147110d565efSmrg        debug use in the loop with the sum or product of all expanded
147210d565efSmrg        accumulators.  Since we'll only know of all expansions at the
147310d565efSmrg        end, we'd have to keep track of which vars_to_expand a debug
147410d565efSmrg        insn in the loop references, take note of each copy of the
147510d565efSmrg        debug insn during unrolling, and when it's all done, compute
147610d565efSmrg        the sum or product of each variable and adjust the original
147710d565efSmrg        debug insn and each copy thereof.  What a pain!  */
147810d565efSmrg     reset_debug_uses_in_loop (loop, dest, debug_uses);
147910d565efSmrg 
148010d565efSmrg   /* Record the accumulator to expand.  */
148110d565efSmrg   ves = XNEW (struct var_to_expand);
148210d565efSmrg   ves->insn = insn;
148310d565efSmrg   ves->reg = copy_rtx (dest);
148410d565efSmrg   ves->var_expansions.create (1);
148510d565efSmrg   ves->next = NULL;
148610d565efSmrg   ves->op = GET_CODE (src);
148710d565efSmrg   ves->expansion_count = 0;
148810d565efSmrg   ves->reuse_expansion = 0;
148910d565efSmrg   return ves;
149010d565efSmrg }
149110d565efSmrg 
149210d565efSmrg /* Determine whether there is an induction variable in INSN that
149310d565efSmrg    we would like to split during unrolling.
149410d565efSmrg 
149510d565efSmrg    I.e. replace
149610d565efSmrg 
149710d565efSmrg    i = i + 1;
149810d565efSmrg    ...
149910d565efSmrg    i = i + 1;
150010d565efSmrg    ...
150110d565efSmrg    i = i + 1;
150210d565efSmrg    ...
150310d565efSmrg 
150410d565efSmrg    type chains by
150510d565efSmrg 
150610d565efSmrg    i0 = i + 1
150710d565efSmrg    ...
150810d565efSmrg    i = i0 + 1
150910d565efSmrg    ...
151010d565efSmrg    i = i0 + 2
151110d565efSmrg    ...
151210d565efSmrg 
151310d565efSmrg    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
151410d565efSmrg    an IV_TO_SPLIT structure, fill it with the relevant information and return a
151510d565efSmrg    pointer to it.  */
151610d565efSmrg 
151710d565efSmrg static struct iv_to_split *
analyze_iv_to_split_insn(rtx_insn * insn)151810d565efSmrg analyze_iv_to_split_insn (rtx_insn *insn)
151910d565efSmrg {
152010d565efSmrg   rtx set, dest;
1521*ec02198aSmrg   class rtx_iv iv;
152210d565efSmrg   struct iv_to_split *ivts;
1523c7a68eb7Smrg   scalar_int_mode mode;
152410d565efSmrg   bool ok;
152510d565efSmrg 
152610d565efSmrg   /* For now we just split the basic induction variables.  Later this may be
152710d565efSmrg      extended for example by selecting also addresses of memory references.  */
152810d565efSmrg   set = single_set (insn);
152910d565efSmrg   if (!set)
153010d565efSmrg     return NULL;
153110d565efSmrg 
153210d565efSmrg   dest = SET_DEST (set);
1533c7a68eb7Smrg   if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
153410d565efSmrg     return NULL;
153510d565efSmrg 
1536c7a68eb7Smrg   if (!biv_p (insn, mode, dest))
153710d565efSmrg     return NULL;
153810d565efSmrg 
153910d565efSmrg   ok = iv_analyze_result (insn, dest, &iv);
154010d565efSmrg 
154110d565efSmrg   /* This used to be an assert under the assumption that if biv_p returns
154210d565efSmrg      true that iv_analyze_result must also return true.  However, that
154310d565efSmrg      assumption is not strictly correct as evidenced by pr25569.
154410d565efSmrg 
154510d565efSmrg      Returning NULL when iv_analyze_result returns false is safe and
154610d565efSmrg      avoids the problems in pr25569 until the iv_analyze_* routines
154710d565efSmrg      can be fixed, which is apparently hard and time consuming
154810d565efSmrg      according to their author.  */
154910d565efSmrg   if (! ok)
155010d565efSmrg     return NULL;
155110d565efSmrg 
155210d565efSmrg   if (iv.step == const0_rtx
155310d565efSmrg       || iv.mode != iv.extend_mode)
155410d565efSmrg     return NULL;
155510d565efSmrg 
155610d565efSmrg   /* Record the insn to split.  */
155710d565efSmrg   ivts = XNEW (struct iv_to_split);
155810d565efSmrg   ivts->insn = insn;
155910d565efSmrg   ivts->orig_var = dest;
156010d565efSmrg   ivts->base_var = NULL_RTX;
156110d565efSmrg   ivts->step = iv.step;
156210d565efSmrg   ivts->next = NULL;
156310d565efSmrg 
156410d565efSmrg   return ivts;
156510d565efSmrg }
156610d565efSmrg 
156710d565efSmrg /* Determines which of insns in LOOP can be optimized.
156810d565efSmrg    Return a OPT_INFO struct with the relevant hash tables filled
156910d565efSmrg    with all insns to be optimized.  The FIRST_NEW_BLOCK field
157010d565efSmrg    is undefined for the return value.  */
157110d565efSmrg 
157210d565efSmrg static struct opt_info *
analyze_insns_in_loop(class loop * loop)1573*ec02198aSmrg analyze_insns_in_loop (class loop *loop)
157410d565efSmrg {
157510d565efSmrg   basic_block *body, bb;
157610d565efSmrg   unsigned i;
157710d565efSmrg   struct opt_info *opt_info = XCNEW (struct opt_info);
157810d565efSmrg   rtx_insn *insn;
157910d565efSmrg   struct iv_to_split *ivts = NULL;
158010d565efSmrg   struct var_to_expand *ves = NULL;
158110d565efSmrg   iv_to_split **slot1;
158210d565efSmrg   var_to_expand **slot2;
158310d565efSmrg   vec<edge> edges = get_loop_exit_edges (loop);
158410d565efSmrg   edge exit;
158510d565efSmrg   bool can_apply = false;
158610d565efSmrg 
158710d565efSmrg   iv_analysis_loop_init (loop);
158810d565efSmrg 
158910d565efSmrg   body = get_loop_body (loop);
159010d565efSmrg 
159110d565efSmrg   if (flag_split_ivs_in_unroller)
159210d565efSmrg     {
159310d565efSmrg       opt_info->insns_to_split
159410d565efSmrg        	= new hash_table<iv_split_hasher> (5 * loop->num_nodes);
159510d565efSmrg       opt_info->iv_to_split_head = NULL;
159610d565efSmrg       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
159710d565efSmrg     }
159810d565efSmrg 
159910d565efSmrg   /* Record the loop exit bb and loop preheader before the unrolling.  */
160010d565efSmrg   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
160110d565efSmrg 
160210d565efSmrg   if (edges.length () == 1)
160310d565efSmrg     {
160410d565efSmrg       exit = edges[0];
160510d565efSmrg       if (!(exit->flags & EDGE_COMPLEX))
160610d565efSmrg 	{
160710d565efSmrg 	  opt_info->loop_exit = split_edge (exit);
160810d565efSmrg 	  can_apply = true;
160910d565efSmrg 	}
161010d565efSmrg     }
161110d565efSmrg 
161210d565efSmrg   if (flag_variable_expansion_in_unroller
161310d565efSmrg       && can_apply)
161410d565efSmrg     {
161510d565efSmrg       opt_info->insns_with_var_to_expand
161610d565efSmrg        	= new hash_table<var_expand_hasher> (5 * loop->num_nodes);
161710d565efSmrg       opt_info->var_to_expand_head = NULL;
161810d565efSmrg       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
161910d565efSmrg     }
162010d565efSmrg 
162110d565efSmrg   for (i = 0; i < loop->num_nodes; i++)
162210d565efSmrg     {
162310d565efSmrg       bb = body[i];
162410d565efSmrg       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
162510d565efSmrg 	continue;
162610d565efSmrg 
162710d565efSmrg       FOR_BB_INSNS (bb, insn)
162810d565efSmrg       {
162910d565efSmrg         if (!INSN_P (insn))
163010d565efSmrg           continue;
163110d565efSmrg 
163210d565efSmrg         if (opt_info->insns_to_split)
163310d565efSmrg           ivts = analyze_iv_to_split_insn (insn);
163410d565efSmrg 
163510d565efSmrg         if (ivts)
163610d565efSmrg           {
163710d565efSmrg             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
163810d565efSmrg 	    gcc_assert (*slot1 == NULL);
163910d565efSmrg             *slot1 = ivts;
164010d565efSmrg 	    *opt_info->iv_to_split_tail = ivts;
164110d565efSmrg 	    opt_info->iv_to_split_tail = &ivts->next;
164210d565efSmrg             continue;
164310d565efSmrg           }
164410d565efSmrg 
164510d565efSmrg         if (opt_info->insns_with_var_to_expand)
164610d565efSmrg           ves = analyze_insn_to_expand_var (loop, insn);
164710d565efSmrg 
164810d565efSmrg         if (ves)
164910d565efSmrg           {
165010d565efSmrg             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
165110d565efSmrg 	    gcc_assert (*slot2 == NULL);
165210d565efSmrg             *slot2 = ves;
165310d565efSmrg 	    *opt_info->var_to_expand_tail = ves;
165410d565efSmrg 	    opt_info->var_to_expand_tail = &ves->next;
165510d565efSmrg           }
165610d565efSmrg       }
165710d565efSmrg     }
165810d565efSmrg 
165910d565efSmrg   edges.release ();
166010d565efSmrg   free (body);
166110d565efSmrg   return opt_info;
166210d565efSmrg }
166310d565efSmrg 
166410d565efSmrg /* Called just before loop duplication.  Records start of duplicated area
166510d565efSmrg    to OPT_INFO.  */
166610d565efSmrg 
166710d565efSmrg static void
opt_info_start_duplication(struct opt_info * opt_info)166810d565efSmrg opt_info_start_duplication (struct opt_info *opt_info)
166910d565efSmrg {
167010d565efSmrg   if (opt_info)
167110d565efSmrg     opt_info->first_new_block = last_basic_block_for_fn (cfun);
167210d565efSmrg }
167310d565efSmrg 
167410d565efSmrg /* Determine the number of iterations between initialization of the base
167510d565efSmrg    variable and the current copy (N_COPY).  N_COPIES is the total number
167610d565efSmrg    of newly created copies.  UNROLLING is true if we are unrolling
167710d565efSmrg    (not peeling) the loop.  */
167810d565efSmrg 
167910d565efSmrg static unsigned
determine_split_iv_delta(unsigned n_copy,unsigned n_copies,bool unrolling)168010d565efSmrg determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
168110d565efSmrg {
168210d565efSmrg   if (unrolling)
168310d565efSmrg     {
168410d565efSmrg       /* If we are unrolling, initialization is done in the original loop
168510d565efSmrg 	 body (number 0).  */
168610d565efSmrg       return n_copy;
168710d565efSmrg     }
168810d565efSmrg   else
168910d565efSmrg     {
169010d565efSmrg       /* If we are peeling, the copy in that the initialization occurs has
169110d565efSmrg 	 number 1.  The original loop (number 0) is the last.  */
169210d565efSmrg       if (n_copy)
169310d565efSmrg 	return n_copy - 1;
169410d565efSmrg       else
169510d565efSmrg 	return n_copies;
169610d565efSmrg     }
169710d565efSmrg }
169810d565efSmrg 
169910d565efSmrg /* Allocate basic variable for the induction variable chain.  */
170010d565efSmrg 
170110d565efSmrg static void
allocate_basic_variable(struct iv_to_split * ivts)170210d565efSmrg allocate_basic_variable (struct iv_to_split *ivts)
170310d565efSmrg {
170410d565efSmrg   rtx expr = SET_SRC (single_set (ivts->insn));
170510d565efSmrg 
170610d565efSmrg   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
170710d565efSmrg }
170810d565efSmrg 
170910d565efSmrg /* Insert initialization of basic variable of IVTS before INSN, taking
171010d565efSmrg    the initial value from INSN.  */
171110d565efSmrg 
171210d565efSmrg static void
insert_base_initialization(struct iv_to_split * ivts,rtx_insn * insn)171310d565efSmrg insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
171410d565efSmrg {
171510d565efSmrg   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
171610d565efSmrg   rtx_insn *seq;
171710d565efSmrg 
171810d565efSmrg   start_sequence ();
171910d565efSmrg   expr = force_operand (expr, ivts->base_var);
172010d565efSmrg   if (expr != ivts->base_var)
172110d565efSmrg     emit_move_insn (ivts->base_var, expr);
172210d565efSmrg   seq = get_insns ();
172310d565efSmrg   end_sequence ();
172410d565efSmrg 
172510d565efSmrg   emit_insn_before (seq, insn);
172610d565efSmrg }
172710d565efSmrg 
172810d565efSmrg /* Replace the use of induction variable described in IVTS in INSN
172910d565efSmrg    by base variable + DELTA * step.  */
173010d565efSmrg 
173110d565efSmrg static void
split_iv(struct iv_to_split * ivts,rtx_insn * insn,unsigned delta)173210d565efSmrg split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
173310d565efSmrg {
173410d565efSmrg   rtx expr, *loc, incr, var;
173510d565efSmrg   rtx_insn *seq;
173610d565efSmrg   machine_mode mode = GET_MODE (ivts->base_var);
173710d565efSmrg   rtx src, dest, set;
173810d565efSmrg 
173910d565efSmrg   /* Construct base + DELTA * step.  */
174010d565efSmrg   if (!delta)
174110d565efSmrg     expr = ivts->base_var;
174210d565efSmrg   else
174310d565efSmrg     {
174410d565efSmrg       incr = simplify_gen_binary (MULT, mode,
1745c7a68eb7Smrg 				  copy_rtx (ivts->step),
1746c7a68eb7Smrg 				  gen_int_mode (delta, mode));
174710d565efSmrg       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
174810d565efSmrg 				  ivts->base_var, incr);
174910d565efSmrg     }
175010d565efSmrg 
175110d565efSmrg   /* Figure out where to do the replacement.  */
175210d565efSmrg   loc = &SET_SRC (single_set (insn));
175310d565efSmrg 
175410d565efSmrg   /* If we can make the replacement right away, we're done.  */
175510d565efSmrg   if (validate_change (insn, loc, expr, 0))
175610d565efSmrg     return;
175710d565efSmrg 
175810d565efSmrg   /* Otherwise, force EXPR into a register and try again.  */
175910d565efSmrg   start_sequence ();
176010d565efSmrg   var = gen_reg_rtx (mode);
176110d565efSmrg   expr = force_operand (expr, var);
176210d565efSmrg   if (expr != var)
176310d565efSmrg     emit_move_insn (var, expr);
176410d565efSmrg   seq = get_insns ();
176510d565efSmrg   end_sequence ();
176610d565efSmrg   emit_insn_before (seq, insn);
176710d565efSmrg 
176810d565efSmrg   if (validate_change (insn, loc, var, 0))
176910d565efSmrg     return;
177010d565efSmrg 
177110d565efSmrg   /* The last chance.  Try recreating the assignment in insn
177210d565efSmrg      completely from scratch.  */
177310d565efSmrg   set = single_set (insn);
177410d565efSmrg   gcc_assert (set);
177510d565efSmrg 
177610d565efSmrg   start_sequence ();
177710d565efSmrg   *loc = var;
177810d565efSmrg   src = copy_rtx (SET_SRC (set));
177910d565efSmrg   dest = copy_rtx (SET_DEST (set));
178010d565efSmrg   src = force_operand (src, dest);
178110d565efSmrg   if (src != dest)
178210d565efSmrg     emit_move_insn (dest, src);
178310d565efSmrg   seq = get_insns ();
178410d565efSmrg   end_sequence ();
178510d565efSmrg 
178610d565efSmrg   emit_insn_before (seq, insn);
178710d565efSmrg   delete_insn (insn);
178810d565efSmrg }
178910d565efSmrg 
179010d565efSmrg 
179110d565efSmrg /* Return one expansion of the accumulator recorded in struct VE.  */
179210d565efSmrg 
179310d565efSmrg static rtx
get_expansion(struct var_to_expand * ve)179410d565efSmrg get_expansion (struct var_to_expand *ve)
179510d565efSmrg {
179610d565efSmrg   rtx reg;
179710d565efSmrg 
179810d565efSmrg   if (ve->reuse_expansion == 0)
179910d565efSmrg     reg = ve->reg;
180010d565efSmrg   else
180110d565efSmrg     reg = ve->var_expansions[ve->reuse_expansion - 1];
180210d565efSmrg 
180310d565efSmrg   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
180410d565efSmrg     ve->reuse_expansion = 0;
180510d565efSmrg   else
180610d565efSmrg     ve->reuse_expansion++;
180710d565efSmrg 
180810d565efSmrg   return reg;
180910d565efSmrg }
181010d565efSmrg 
181110d565efSmrg 
181210d565efSmrg /* Given INSN replace the uses of the accumulator recorded in VE
181310d565efSmrg    with a new register.  */
181410d565efSmrg 
181510d565efSmrg static void
expand_var_during_unrolling(struct var_to_expand * ve,rtx_insn * insn)181610d565efSmrg expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
181710d565efSmrg {
181810d565efSmrg   rtx new_reg, set;
181910d565efSmrg   bool really_new_expansion = false;
182010d565efSmrg 
182110d565efSmrg   set = single_set (insn);
182210d565efSmrg   gcc_assert (set);
182310d565efSmrg 
182410d565efSmrg   /* Generate a new register only if the expansion limit has not been
182510d565efSmrg      reached.  Else reuse an already existing expansion.  */
1826*ec02198aSmrg   if (param_max_variable_expansions > ve->expansion_count)
182710d565efSmrg     {
182810d565efSmrg       really_new_expansion = true;
182910d565efSmrg       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
183010d565efSmrg     }
183110d565efSmrg   else
183210d565efSmrg     new_reg = get_expansion (ve);
183310d565efSmrg 
183410d565efSmrg   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
183510d565efSmrg   if (apply_change_group ())
183610d565efSmrg     if (really_new_expansion)
183710d565efSmrg       {
183810d565efSmrg         ve->var_expansions.safe_push (new_reg);
183910d565efSmrg         ve->expansion_count++;
184010d565efSmrg       }
184110d565efSmrg }
184210d565efSmrg 
184310d565efSmrg /* Initialize the variable expansions in loop preheader.  PLACE is the
184410d565efSmrg    loop-preheader basic block where the initialization of the
184510d565efSmrg    expansions should take place.  The expansions are initialized with
184610d565efSmrg    (-0) when the operation is plus or minus to honor sign zero.  This
184710d565efSmrg    way we can prevent cases where the sign of the final result is
184810d565efSmrg    effected by the sign of the expansion.  Here is an example to
184910d565efSmrg    demonstrate this:
185010d565efSmrg 
185110d565efSmrg    for (i = 0 ; i < n; i++)
185210d565efSmrg      sum += something;
185310d565efSmrg 
185410d565efSmrg    ==>
185510d565efSmrg 
185610d565efSmrg    sum += something
185710d565efSmrg    ....
185810d565efSmrg    i = i+1;
185910d565efSmrg    sum1 += something
186010d565efSmrg    ....
186110d565efSmrg    i = i+1
186210d565efSmrg    sum2 += something;
186310d565efSmrg    ....
186410d565efSmrg 
186510d565efSmrg    When SUM is initialized with -zero and SOMETHING is also -zero; the
186610d565efSmrg    final result of sum should be -zero thus the expansions sum1 and sum2
186710d565efSmrg    should be initialized with -zero as well (otherwise we will get +zero
186810d565efSmrg    as the final result).  */
186910d565efSmrg 
187010d565efSmrg static void
insert_var_expansion_initialization(struct var_to_expand * ve,basic_block place)187110d565efSmrg insert_var_expansion_initialization (struct var_to_expand *ve,
187210d565efSmrg 				     basic_block place)
187310d565efSmrg {
187410d565efSmrg   rtx_insn *seq;
187510d565efSmrg   rtx var, zero_init;
187610d565efSmrg   unsigned i;
187710d565efSmrg   machine_mode mode = GET_MODE (ve->reg);
187810d565efSmrg   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
187910d565efSmrg 
188010d565efSmrg   if (ve->var_expansions.length () == 0)
188110d565efSmrg     return;
188210d565efSmrg 
188310d565efSmrg   start_sequence ();
188410d565efSmrg   switch (ve->op)
188510d565efSmrg     {
188610d565efSmrg     case FMA:
188710d565efSmrg       /* Note that we only accumulate FMA via the ADD operand.  */
188810d565efSmrg     case PLUS:
188910d565efSmrg     case MINUS:
189010d565efSmrg       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
189110d565efSmrg         {
189210d565efSmrg 	  if (honor_signed_zero_p)
189310d565efSmrg 	    zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
189410d565efSmrg 	  else
189510d565efSmrg 	    zero_init = CONST0_RTX (mode);
189610d565efSmrg           emit_move_insn (var, zero_init);
189710d565efSmrg         }
189810d565efSmrg       break;
189910d565efSmrg 
190010d565efSmrg     case MULT:
190110d565efSmrg       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
190210d565efSmrg         {
190310d565efSmrg           zero_init = CONST1_RTX (GET_MODE (var));
190410d565efSmrg           emit_move_insn (var, zero_init);
190510d565efSmrg         }
190610d565efSmrg       break;
190710d565efSmrg 
190810d565efSmrg     default:
190910d565efSmrg       gcc_unreachable ();
191010d565efSmrg     }
191110d565efSmrg 
191210d565efSmrg   seq = get_insns ();
191310d565efSmrg   end_sequence ();
191410d565efSmrg 
191510d565efSmrg   emit_insn_after (seq, BB_END (place));
191610d565efSmrg }
191710d565efSmrg 
191810d565efSmrg /* Combine the variable expansions at the loop exit.  PLACE is the
191910d565efSmrg    loop exit basic block where the summation of the expansions should
192010d565efSmrg    take place.  */
192110d565efSmrg 
192210d565efSmrg static void
combine_var_copies_in_loop_exit(struct var_to_expand * ve,basic_block place)192310d565efSmrg combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
192410d565efSmrg {
192510d565efSmrg   rtx sum = ve->reg;
192610d565efSmrg   rtx expr, var;
192710d565efSmrg   rtx_insn *seq, *insn;
192810d565efSmrg   unsigned i;
192910d565efSmrg 
193010d565efSmrg   if (ve->var_expansions.length () == 0)
193110d565efSmrg     return;
193210d565efSmrg 
193310d565efSmrg   /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
193410d565efSmrg      it both here and as the destination of the assignment.  */
193510d565efSmrg   sum = copy_rtx (sum);
193610d565efSmrg   start_sequence ();
193710d565efSmrg   switch (ve->op)
193810d565efSmrg     {
193910d565efSmrg     case FMA:
194010d565efSmrg       /* Note that we only accumulate FMA via the ADD operand.  */
194110d565efSmrg     case PLUS:
194210d565efSmrg     case MINUS:
194310d565efSmrg       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
194410d565efSmrg 	sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
194510d565efSmrg       break;
194610d565efSmrg 
194710d565efSmrg     case MULT:
194810d565efSmrg       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
194910d565efSmrg 	sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
195010d565efSmrg       break;
195110d565efSmrg 
195210d565efSmrg     default:
195310d565efSmrg       gcc_unreachable ();
195410d565efSmrg     }
195510d565efSmrg 
195610d565efSmrg   expr = force_operand (sum, ve->reg);
195710d565efSmrg   if (expr != ve->reg)
195810d565efSmrg     emit_move_insn (ve->reg, expr);
195910d565efSmrg   seq = get_insns ();
196010d565efSmrg   end_sequence ();
196110d565efSmrg 
196210d565efSmrg   insn = BB_HEAD (place);
196310d565efSmrg   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
196410d565efSmrg     insn = NEXT_INSN (insn);
196510d565efSmrg 
196610d565efSmrg   emit_insn_after (seq, insn);
196710d565efSmrg }
196810d565efSmrg 
196910d565efSmrg /* Strip away REG_EQUAL notes for IVs we're splitting.
197010d565efSmrg 
197110d565efSmrg    Updating REG_EQUAL notes for IVs we split is tricky: We
197210d565efSmrg    cannot tell until after unrolling, DF-rescanning, and liveness
197310d565efSmrg    updating, whether an EQ_USE is reached by the split IV while
197410d565efSmrg    the IV reg is still live.  See PR55006.
197510d565efSmrg 
197610d565efSmrg    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
197710d565efSmrg    because RTL loop-iv requires us to defer rescanning insns and
197810d565efSmrg    any notes attached to them.  So resort to old techniques...  */
197910d565efSmrg 
198010d565efSmrg static void
maybe_strip_eq_note_for_split_iv(struct opt_info * opt_info,rtx_insn * insn)198110d565efSmrg maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
198210d565efSmrg {
198310d565efSmrg   struct iv_to_split *ivts;
198410d565efSmrg   rtx note = find_reg_equal_equiv_note (insn);
198510d565efSmrg   if (! note)
198610d565efSmrg     return;
198710d565efSmrg   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
198810d565efSmrg     if (reg_mentioned_p (ivts->orig_var, note))
198910d565efSmrg       {
199010d565efSmrg 	remove_note (insn, note);
199110d565efSmrg 	return;
199210d565efSmrg       }
199310d565efSmrg }
199410d565efSmrg 
199510d565efSmrg /* Apply loop optimizations in loop copies using the
199610d565efSmrg    data which gathered during the unrolling.  Structure
199710d565efSmrg    OPT_INFO record that data.
199810d565efSmrg 
199910d565efSmrg    UNROLLING is true if we unrolled (not peeled) the loop.
200010d565efSmrg    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
200110d565efSmrg    the loop (as it should happen in complete unrolling, but not in ordinary
200210d565efSmrg    peeling of the loop).  */
200310d565efSmrg 
200410d565efSmrg static void
apply_opt_in_copies(struct opt_info * opt_info,unsigned n_copies,bool unrolling,bool rewrite_original_loop)200510d565efSmrg apply_opt_in_copies (struct opt_info *opt_info,
200610d565efSmrg                      unsigned n_copies, bool unrolling,
200710d565efSmrg                      bool rewrite_original_loop)
200810d565efSmrg {
200910d565efSmrg   unsigned i, delta;
201010d565efSmrg   basic_block bb, orig_bb;
201110d565efSmrg   rtx_insn *insn, *orig_insn, *next;
201210d565efSmrg   struct iv_to_split ivts_templ, *ivts;
201310d565efSmrg   struct var_to_expand ve_templ, *ves;
201410d565efSmrg 
201510d565efSmrg   /* Sanity check -- we need to put initialization in the original loop
201610d565efSmrg      body.  */
201710d565efSmrg   gcc_assert (!unrolling || rewrite_original_loop);
201810d565efSmrg 
201910d565efSmrg   /* Allocate the basic variables (i0).  */
202010d565efSmrg   if (opt_info->insns_to_split)
202110d565efSmrg     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
202210d565efSmrg       allocate_basic_variable (ivts);
202310d565efSmrg 
202410d565efSmrg   for (i = opt_info->first_new_block;
202510d565efSmrg        i < (unsigned) last_basic_block_for_fn (cfun);
202610d565efSmrg        i++)
202710d565efSmrg     {
202810d565efSmrg       bb = BASIC_BLOCK_FOR_FN (cfun, i);
202910d565efSmrg       orig_bb = get_bb_original (bb);
203010d565efSmrg 
203110d565efSmrg       /* bb->aux holds position in copy sequence initialized by
203210d565efSmrg 	 duplicate_loop_to_header_edge.  */
203310d565efSmrg       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
203410d565efSmrg 					unrolling);
203510d565efSmrg       bb->aux = 0;
203610d565efSmrg       orig_insn = BB_HEAD (orig_bb);
203710d565efSmrg       FOR_BB_INSNS_SAFE (bb, insn, next)
203810d565efSmrg         {
203910d565efSmrg 	  if (!INSN_P (insn)
2040c7a68eb7Smrg 	      || (DEBUG_BIND_INSN_P (insn)
2041c7a68eb7Smrg 		  && INSN_VAR_LOCATION_DECL (insn)
204210d565efSmrg 		  && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
204310d565efSmrg             continue;
204410d565efSmrg 
204510d565efSmrg 	  while (!INSN_P (orig_insn)
2046c7a68eb7Smrg 		 || (DEBUG_BIND_INSN_P (orig_insn)
2047c7a68eb7Smrg 		     && INSN_VAR_LOCATION_DECL (orig_insn)
204810d565efSmrg 		     && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
204910d565efSmrg 			 == LABEL_DECL)))
205010d565efSmrg             orig_insn = NEXT_INSN (orig_insn);
205110d565efSmrg 
205210d565efSmrg           ivts_templ.insn = orig_insn;
205310d565efSmrg           ve_templ.insn = orig_insn;
205410d565efSmrg 
205510d565efSmrg           /* Apply splitting iv optimization.  */
205610d565efSmrg           if (opt_info->insns_to_split)
205710d565efSmrg             {
205810d565efSmrg 	      maybe_strip_eq_note_for_split_iv (opt_info, insn);
205910d565efSmrg 
206010d565efSmrg               ivts = opt_info->insns_to_split->find (&ivts_templ);
206110d565efSmrg 
206210d565efSmrg               if (ivts)
206310d565efSmrg                 {
206410d565efSmrg 		  gcc_assert (GET_CODE (PATTERN (insn))
206510d565efSmrg 			      == GET_CODE (PATTERN (orig_insn)));
206610d565efSmrg 
206710d565efSmrg                   if (!delta)
206810d565efSmrg                     insert_base_initialization (ivts, insn);
206910d565efSmrg                   split_iv (ivts, insn, delta);
207010d565efSmrg                 }
207110d565efSmrg             }
207210d565efSmrg           /* Apply variable expansion optimization.  */
207310d565efSmrg           if (unrolling && opt_info->insns_with_var_to_expand)
207410d565efSmrg             {
207510d565efSmrg               ves = (struct var_to_expand *)
207610d565efSmrg 		opt_info->insns_with_var_to_expand->find (&ve_templ);
207710d565efSmrg               if (ves)
207810d565efSmrg                 {
207910d565efSmrg 		  gcc_assert (GET_CODE (PATTERN (insn))
208010d565efSmrg 			      == GET_CODE (PATTERN (orig_insn)));
208110d565efSmrg                   expand_var_during_unrolling (ves, insn);
208210d565efSmrg                 }
208310d565efSmrg             }
208410d565efSmrg           orig_insn = NEXT_INSN (orig_insn);
208510d565efSmrg         }
208610d565efSmrg     }
208710d565efSmrg 
208810d565efSmrg   if (!rewrite_original_loop)
208910d565efSmrg     return;
209010d565efSmrg 
209110d565efSmrg   /* Initialize the variable expansions in the loop preheader
209210d565efSmrg      and take care of combining them at the loop exit.  */
209310d565efSmrg   if (opt_info->insns_with_var_to_expand)
209410d565efSmrg     {
209510d565efSmrg       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
209610d565efSmrg 	insert_var_expansion_initialization (ves, opt_info->loop_preheader);
209710d565efSmrg       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
209810d565efSmrg 	combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
209910d565efSmrg     }
210010d565efSmrg 
210110d565efSmrg   /* Rewrite also the original loop body.  Find them as originals of the blocks
210210d565efSmrg      in the last copied iteration, i.e. those that have
210310d565efSmrg      get_bb_copy (get_bb_original (bb)) == bb.  */
210410d565efSmrg   for (i = opt_info->first_new_block;
210510d565efSmrg        i < (unsigned) last_basic_block_for_fn (cfun);
210610d565efSmrg        i++)
210710d565efSmrg     {
210810d565efSmrg       bb = BASIC_BLOCK_FOR_FN (cfun, i);
210910d565efSmrg       orig_bb = get_bb_original (bb);
211010d565efSmrg       if (get_bb_copy (orig_bb) != bb)
211110d565efSmrg 	continue;
211210d565efSmrg 
211310d565efSmrg       delta = determine_split_iv_delta (0, n_copies, unrolling);
211410d565efSmrg       for (orig_insn = BB_HEAD (orig_bb);
211510d565efSmrg            orig_insn != NEXT_INSN (BB_END (bb));
211610d565efSmrg            orig_insn = next)
211710d565efSmrg         {
211810d565efSmrg           next = NEXT_INSN (orig_insn);
211910d565efSmrg 
212010d565efSmrg           if (!INSN_P (orig_insn))
212110d565efSmrg  	    continue;
212210d565efSmrg 
212310d565efSmrg           ivts_templ.insn = orig_insn;
212410d565efSmrg           if (opt_info->insns_to_split)
212510d565efSmrg             {
212610d565efSmrg 	      maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
212710d565efSmrg 
212810d565efSmrg               ivts = (struct iv_to_split *)
212910d565efSmrg 		opt_info->insns_to_split->find (&ivts_templ);
213010d565efSmrg               if (ivts)
213110d565efSmrg                 {
213210d565efSmrg                   if (!delta)
213310d565efSmrg                     insert_base_initialization (ivts, orig_insn);
213410d565efSmrg                   split_iv (ivts, orig_insn, delta);
213510d565efSmrg                   continue;
213610d565efSmrg                 }
213710d565efSmrg             }
213810d565efSmrg 
213910d565efSmrg         }
214010d565efSmrg     }
214110d565efSmrg }
214210d565efSmrg 
214310d565efSmrg /* Release OPT_INFO.  */
214410d565efSmrg 
214510d565efSmrg static void
free_opt_info(struct opt_info * opt_info)214610d565efSmrg free_opt_info (struct opt_info *opt_info)
214710d565efSmrg {
214810d565efSmrg   delete opt_info->insns_to_split;
214910d565efSmrg   opt_info->insns_to_split = NULL;
215010d565efSmrg   if (opt_info->insns_with_var_to_expand)
215110d565efSmrg     {
215210d565efSmrg       struct var_to_expand *ves;
215310d565efSmrg 
215410d565efSmrg       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
215510d565efSmrg 	ves->var_expansions.release ();
215610d565efSmrg       delete opt_info->insns_with_var_to_expand;
215710d565efSmrg       opt_info->insns_with_var_to_expand = NULL;
215810d565efSmrg     }
215910d565efSmrg   free (opt_info);
216010d565efSmrg }
2161