xref: /dragonfly/contrib/gcc-8.0/gcc/loop-unroll.c (revision e215fc28)
138fd1498Szrj /* Loop unrolling.
238fd1498Szrj    Copyright (C) 2002-2018 Free Software Foundation, Inc.
338fd1498Szrj 
438fd1498Szrj This file is part of GCC.
538fd1498Szrj 
638fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
738fd1498Szrj the terms of the GNU General Public License as published by the Free
838fd1498Szrj Software Foundation; either version 3, or (at your option) any later
938fd1498Szrj version.
1038fd1498Szrj 
1138fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
1238fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
1338fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1438fd1498Szrj for more details.
1538fd1498Szrj 
1638fd1498Szrj You should have received a copy of the GNU General Public License
1738fd1498Szrj along with GCC; see the file COPYING3.  If not see
1838fd1498Szrj <http://www.gnu.org/licenses/>.  */
1938fd1498Szrj 
2038fd1498Szrj #include "config.h"
2138fd1498Szrj #include "system.h"
2238fd1498Szrj #include "coretypes.h"
2338fd1498Szrj #include "backend.h"
2438fd1498Szrj #include "target.h"
2538fd1498Szrj #include "rtl.h"
2638fd1498Szrj #include "tree.h"
2738fd1498Szrj #include "cfghooks.h"
2838fd1498Szrj #include "memmodel.h"
2938fd1498Szrj #include "optabs.h"
3038fd1498Szrj #include "emit-rtl.h"
3138fd1498Szrj #include "recog.h"
3238fd1498Szrj #include "profile.h"
3338fd1498Szrj #include "cfgrtl.h"
3438fd1498Szrj #include "cfgloop.h"
3538fd1498Szrj #include "params.h"
3638fd1498Szrj #include "dojump.h"
3738fd1498Szrj #include "expr.h"
3838fd1498Szrj #include "dumpfile.h"
3938fd1498Szrj 
4038fd1498Szrj /* This pass performs loop unrolling.  We only perform this
4138fd1498Szrj    optimization on innermost loops (with single exception) because
4238fd1498Szrj    the impact on performance is greatest here, and we want to avoid
4338fd1498Szrj    unnecessary code size growth.  The gain is caused by greater sequentiality
4438fd1498Szrj    of code, better code to optimize for further passes and in some cases
4538fd1498Szrj    by fewer testings of exit conditions.  The main problem is code growth,
4638fd1498Szrj    that impacts performance negatively due to effect of caches.
4738fd1498Szrj 
4838fd1498Szrj    What we do:
4938fd1498Szrj 
5038fd1498Szrj    -- unrolling of loops that roll constant times; this is almost always
5138fd1498Szrj       win, as we get rid of exit condition tests.
5238fd1498Szrj    -- unrolling of loops that roll number of times that we can compute
5338fd1498Szrj       in runtime; we also get rid of exit condition tests here, but there
5438fd1498Szrj       is the extra expense for calculating the number of iterations
5538fd1498Szrj    -- simple unrolling of remaining loops; this is performed only if we
5638fd1498Szrj       are asked to, as the gain is questionable in this case and often
5738fd1498Szrj       it may even slow down the code
5838fd1498Szrj    For more detailed descriptions of each of those, see comments at
5938fd1498Szrj    appropriate function below.
6038fd1498Szrj 
6138fd1498Szrj    There is a lot of parameters (defined and described in params.def) that
6238fd1498Szrj    control how much we unroll.
6338fd1498Szrj 
6438fd1498Szrj    ??? A great problem is that we don't have a good way how to determine
6538fd1498Szrj    how many times we should unroll the loop; the experiments I have made
6638fd1498Szrj    showed that this choice may affect performance in order of several %.
6738fd1498Szrj    */
6838fd1498Szrj 
6938fd1498Szrj /* Information about induction variables to split.  */
7038fd1498Szrj 
7138fd1498Szrj struct iv_to_split
7238fd1498Szrj {
7338fd1498Szrj   rtx_insn *insn;	/* The insn in that the induction variable occurs.  */
7438fd1498Szrj   rtx orig_var;		/* The variable (register) for the IV before split.  */
7538fd1498Szrj   rtx base_var;		/* The variable on that the values in the further
7638fd1498Szrj 			   iterations are based.  */
7738fd1498Szrj   rtx step;		/* Step of the induction variable.  */
7838fd1498Szrj   struct iv_to_split *next; /* Next entry in walking order.  */
7938fd1498Szrj };
8038fd1498Szrj 
8138fd1498Szrj /* Information about accumulators to expand.  */
8238fd1498Szrj 
8338fd1498Szrj struct var_to_expand
8438fd1498Szrj {
8538fd1498Szrj   rtx_insn *insn;	           /* The insn in that the variable expansion occurs.  */
8638fd1498Szrj   rtx reg;                         /* The accumulator which is expanded.  */
8738fd1498Szrj   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
8838fd1498Szrj   struct var_to_expand *next;	   /* Next entry in walking order.  */
8938fd1498Szrj   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
9038fd1498Szrj                                       or multiplication.  */
9138fd1498Szrj   int expansion_count;             /* Count the number of expansions generated so far.  */
9238fd1498Szrj   int reuse_expansion;             /* The expansion we intend to reuse to expand
9338fd1498Szrj                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
9438fd1498Szrj                                       the original accumulator.  Else use
9538fd1498Szrj                                       var_expansions[REUSE_EXPANSION - 1].  */
9638fd1498Szrj };
9738fd1498Szrj 
9838fd1498Szrj /* Hashtable helper for iv_to_split.  */
9938fd1498Szrj 
10038fd1498Szrj struct iv_split_hasher : free_ptr_hash <iv_to_split>
10138fd1498Szrj {
10238fd1498Szrj   static inline hashval_t hash (const iv_to_split *);
10338fd1498Szrj   static inline bool equal (const iv_to_split *, const iv_to_split *);
10438fd1498Szrj };
10538fd1498Szrj 
10638fd1498Szrj 
10738fd1498Szrj /* A hash function for information about insns to split.  */
10838fd1498Szrj 
10938fd1498Szrj inline hashval_t
hash(const iv_to_split * ivts)11038fd1498Szrj iv_split_hasher::hash (const iv_to_split *ivts)
11138fd1498Szrj {
11238fd1498Szrj   return (hashval_t) INSN_UID (ivts->insn);
11338fd1498Szrj }
11438fd1498Szrj 
11538fd1498Szrj /* An equality functions for information about insns to split.  */
11638fd1498Szrj 
11738fd1498Szrj inline bool
equal(const iv_to_split * i1,const iv_to_split * i2)11838fd1498Szrj iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
11938fd1498Szrj {
12038fd1498Szrj   return i1->insn == i2->insn;
12138fd1498Szrj }
12238fd1498Szrj 
12338fd1498Szrj /* Hashtable helper for iv_to_split.  */
12438fd1498Szrj 
12538fd1498Szrj struct var_expand_hasher : free_ptr_hash <var_to_expand>
12638fd1498Szrj {
12738fd1498Szrj   static inline hashval_t hash (const var_to_expand *);
12838fd1498Szrj   static inline bool equal (const var_to_expand *, const var_to_expand *);
12938fd1498Szrj };
13038fd1498Szrj 
13138fd1498Szrj /* Return a hash for VES.  */
13238fd1498Szrj 
13338fd1498Szrj inline hashval_t
hash(const var_to_expand * ves)13438fd1498Szrj var_expand_hasher::hash (const var_to_expand *ves)
13538fd1498Szrj {
13638fd1498Szrj   return (hashval_t) INSN_UID (ves->insn);
13738fd1498Szrj }
13838fd1498Szrj 
13938fd1498Szrj /* Return true if I1 and I2 refer to the same instruction.  */
14038fd1498Szrj 
14138fd1498Szrj inline bool
equal(const var_to_expand * i1,const var_to_expand * i2)14238fd1498Szrj var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
14338fd1498Szrj {
14438fd1498Szrj   return i1->insn == i2->insn;
14538fd1498Szrj }
14638fd1498Szrj 
14738fd1498Szrj /* Information about optimization applied in
14838fd1498Szrj    the unrolled loop.  */
14938fd1498Szrj 
15038fd1498Szrj struct opt_info
15138fd1498Szrj {
15238fd1498Szrj   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
15338fd1498Szrj 						  split.  */
15438fd1498Szrj   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
15538fd1498Szrj   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
15638fd1498Szrj   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
15738fd1498Szrj 					insns with accumulators to expand.  */
15838fd1498Szrj   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
15938fd1498Szrj   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
16038fd1498Szrj   unsigned first_new_block;        /* The first basic block that was
16138fd1498Szrj                                       duplicated.  */
16238fd1498Szrj   basic_block loop_exit;           /* The loop exit basic block.  */
16338fd1498Szrj   basic_block loop_preheader;      /* The loop preheader basic block.  */
16438fd1498Szrj };
16538fd1498Szrj 
16638fd1498Szrj static void decide_unroll_stupid (struct loop *, int);
16738fd1498Szrj static void decide_unroll_constant_iterations (struct loop *, int);
16838fd1498Szrj static void decide_unroll_runtime_iterations (struct loop *, int);
16938fd1498Szrj static void unroll_loop_stupid (struct loop *);
17038fd1498Szrj static void decide_unrolling (int);
17138fd1498Szrj static void unroll_loop_constant_iterations (struct loop *);
17238fd1498Szrj static void unroll_loop_runtime_iterations (struct loop *);
17338fd1498Szrj static struct opt_info *analyze_insns_in_loop (struct loop *);
17438fd1498Szrj static void opt_info_start_duplication (struct opt_info *);
17538fd1498Szrj static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
17638fd1498Szrj static void free_opt_info (struct opt_info *);
17738fd1498Szrj static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
17838fd1498Szrj static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
17938fd1498Szrj static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
18038fd1498Szrj static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
18138fd1498Szrj static void insert_var_expansion_initialization (struct var_to_expand *,
18238fd1498Szrj 						 basic_block);
18338fd1498Szrj static void combine_var_copies_in_loop_exit (struct var_to_expand *,
18438fd1498Szrj 					     basic_block);
18538fd1498Szrj static rtx get_expansion (struct var_to_expand *);
18638fd1498Szrj 
18738fd1498Szrj /* Emit a message summarizing the unroll that will be
18838fd1498Szrj    performed for LOOP, along with the loop's location LOCUS, if
18938fd1498Szrj    appropriate given the dump or -fopt-info settings.  */
19038fd1498Szrj 
19138fd1498Szrj static void
report_unroll(struct loop * loop,location_t locus)19238fd1498Szrj report_unroll (struct loop *loop, location_t locus)
19338fd1498Szrj {
19438fd1498Szrj   dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
19538fd1498Szrj 
19638fd1498Szrj   if (loop->lpt_decision.decision == LPT_NONE)
19738fd1498Szrj     return;
19838fd1498Szrj 
19938fd1498Szrj   if (!dump_enabled_p ())
20038fd1498Szrj     return;
20138fd1498Szrj 
20238fd1498Szrj   dump_printf_loc (report_flags, locus,
20338fd1498Szrj                    "loop unrolled %d times",
20438fd1498Szrj                    loop->lpt_decision.times);
20538fd1498Szrj   if (profile_info && loop->header->count.initialized_p ())
20638fd1498Szrj     dump_printf (report_flags,
20738fd1498Szrj                  " (header execution count %d)",
20838fd1498Szrj                  (int)loop->header->count.to_gcov_type ());
20938fd1498Szrj 
21038fd1498Szrj   dump_printf (report_flags, "\n");
21138fd1498Szrj }
21238fd1498Szrj 
21338fd1498Szrj /* Decide whether unroll loops and how much.  */
21438fd1498Szrj static void
decide_unrolling(int flags)21538fd1498Szrj decide_unrolling (int flags)
21638fd1498Szrj {
21738fd1498Szrj   struct loop *loop;
21838fd1498Szrj 
21938fd1498Szrj   /* Scan the loops, inner ones first.  */
22038fd1498Szrj   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
22138fd1498Szrj     {
22238fd1498Szrj       loop->lpt_decision.decision = LPT_NONE;
22338fd1498Szrj       location_t locus = get_loop_location (loop);
22438fd1498Szrj 
22538fd1498Szrj       if (dump_enabled_p ())
22638fd1498Szrj 	dump_printf_loc (MSG_NOTE, locus,
22738fd1498Szrj 			 "considering unrolling loop %d at BB %d\n",
22838fd1498Szrj 			 loop->num, loop->header->index);
22938fd1498Szrj 
23038fd1498Szrj       if (loop->unroll == 1)
23138fd1498Szrj 	{
23238fd1498Szrj 	  if (dump_file)
23338fd1498Szrj 	    fprintf (dump_file,
23438fd1498Szrj 		     ";; Not unrolling loop, user didn't want it unrolled\n");
23538fd1498Szrj 	  continue;
23638fd1498Szrj 	}
23738fd1498Szrj 
23838fd1498Szrj       /* Do not peel cold areas.  */
23938fd1498Szrj       if (optimize_loop_for_size_p (loop))
24038fd1498Szrj 	{
24138fd1498Szrj 	  if (dump_file)
24238fd1498Szrj 	    fprintf (dump_file, ";; Not considering loop, cold area\n");
24338fd1498Szrj 	  continue;
24438fd1498Szrj 	}
24538fd1498Szrj 
24638fd1498Szrj       /* Can the loop be manipulated?  */
24738fd1498Szrj       if (!can_duplicate_loop_p (loop))
24838fd1498Szrj 	{
24938fd1498Szrj 	  if (dump_file)
25038fd1498Szrj 	    fprintf (dump_file,
25138fd1498Szrj 		     ";; Not considering loop, cannot duplicate\n");
25238fd1498Szrj 	  continue;
25338fd1498Szrj 	}
25438fd1498Szrj 
25538fd1498Szrj       /* Skip non-innermost loops.  */
25638fd1498Szrj       if (loop->inner)
25738fd1498Szrj 	{
25838fd1498Szrj 	  if (dump_file)
25938fd1498Szrj 	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
26038fd1498Szrj 	  continue;
26138fd1498Szrj 	}
26238fd1498Szrj 
26338fd1498Szrj       loop->ninsns = num_loop_insns (loop);
26438fd1498Szrj       loop->av_ninsns = average_num_loop_insns (loop);
26538fd1498Szrj 
26638fd1498Szrj       /* Try transformations one by one in decreasing order of priority.  */
26738fd1498Szrj       decide_unroll_constant_iterations (loop, flags);
26838fd1498Szrj       if (loop->lpt_decision.decision == LPT_NONE)
26938fd1498Szrj 	decide_unroll_runtime_iterations (loop, flags);
27038fd1498Szrj       if (loop->lpt_decision.decision == LPT_NONE)
27138fd1498Szrj 	decide_unroll_stupid (loop, flags);
27238fd1498Szrj 
27338fd1498Szrj       report_unroll (loop, locus);
27438fd1498Szrj     }
27538fd1498Szrj }
27638fd1498Szrj 
27738fd1498Szrj /* Unroll LOOPS.  */
27838fd1498Szrj void
unroll_loops(int flags)27938fd1498Szrj unroll_loops (int flags)
28038fd1498Szrj {
28138fd1498Szrj   struct loop *loop;
28238fd1498Szrj   bool changed = false;
28338fd1498Szrj 
28438fd1498Szrj   /* Now decide rest of unrolling.  */
28538fd1498Szrj   decide_unrolling (flags);
28638fd1498Szrj 
28738fd1498Szrj   /* Scan the loops, inner ones first.  */
28838fd1498Szrj   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
28938fd1498Szrj     {
29038fd1498Szrj       /* And perform the appropriate transformations.  */
29138fd1498Szrj       switch (loop->lpt_decision.decision)
29238fd1498Szrj 	{
29338fd1498Szrj 	case LPT_UNROLL_CONSTANT:
29438fd1498Szrj 	  unroll_loop_constant_iterations (loop);
29538fd1498Szrj 	  changed = true;
29638fd1498Szrj 	  break;
29738fd1498Szrj 	case LPT_UNROLL_RUNTIME:
29838fd1498Szrj 	  unroll_loop_runtime_iterations (loop);
29938fd1498Szrj 	  changed = true;
30038fd1498Szrj 	  break;
30138fd1498Szrj 	case LPT_UNROLL_STUPID:
30238fd1498Szrj 	  unroll_loop_stupid (loop);
30338fd1498Szrj 	  changed = true;
30438fd1498Szrj 	  break;
30538fd1498Szrj 	case LPT_NONE:
30638fd1498Szrj 	  break;
30738fd1498Szrj 	default:
30838fd1498Szrj 	  gcc_unreachable ();
30938fd1498Szrj 	}
31038fd1498Szrj     }
31138fd1498Szrj 
31238fd1498Szrj     if (changed)
31338fd1498Szrj       {
31438fd1498Szrj 	calculate_dominance_info (CDI_DOMINATORS);
31538fd1498Szrj 	fix_loop_structure (NULL);
31638fd1498Szrj       }
31738fd1498Szrj 
31838fd1498Szrj   iv_analysis_done ();
31938fd1498Szrj }
32038fd1498Szrj 
32138fd1498Szrj /* Check whether exit of the LOOP is at the end of loop body.  */
32238fd1498Szrj 
32338fd1498Szrj static bool
loop_exit_at_end_p(struct loop * loop)32438fd1498Szrj loop_exit_at_end_p (struct loop *loop)
32538fd1498Szrj {
32638fd1498Szrj   struct niter_desc *desc = get_simple_loop_desc (loop);
32738fd1498Szrj   rtx_insn *insn;
32838fd1498Szrj 
32938fd1498Szrj   /* We should never have conditional in latch block.  */
33038fd1498Szrj   gcc_assert (desc->in_edge->dest != loop->header);
33138fd1498Szrj 
33238fd1498Szrj   if (desc->in_edge->dest != loop->latch)
33338fd1498Szrj     return false;
33438fd1498Szrj 
33538fd1498Szrj   /* Check that the latch is empty.  */
33638fd1498Szrj   FOR_BB_INSNS (loop->latch, insn)
33738fd1498Szrj     {
33838fd1498Szrj       if (INSN_P (insn) && active_insn_p (insn))
33938fd1498Szrj 	return false;
34038fd1498Szrj     }
34138fd1498Szrj 
34238fd1498Szrj   return true;
34338fd1498Szrj }
34438fd1498Szrj 
34538fd1498Szrj /* Decide whether to unroll LOOP iterating constant number of times
34638fd1498Szrj    and how much.  */
34738fd1498Szrj 
34838fd1498Szrj static void
decide_unroll_constant_iterations(struct loop * loop,int flags)34938fd1498Szrj decide_unroll_constant_iterations (struct loop *loop, int flags)
35038fd1498Szrj {
35138fd1498Szrj   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
35238fd1498Szrj   struct niter_desc *desc;
35338fd1498Szrj   widest_int iterations;
35438fd1498Szrj 
35538fd1498Szrj   /* If we were not asked to unroll this loop, just return back silently.  */
35638fd1498Szrj   if (!(flags & UAP_UNROLL) && !loop->unroll)
35738fd1498Szrj     return;
35838fd1498Szrj 
35938fd1498Szrj   if (dump_enabled_p ())
36038fd1498Szrj     dump_printf (MSG_NOTE,
36138fd1498Szrj 		 "considering unrolling loop with constant "
36238fd1498Szrj 		 "number of iterations\n");
36338fd1498Szrj 
36438fd1498Szrj   /* nunroll = total number of copies of the original loop body in
36538fd1498Szrj      unrolled loop (i.e. if it is 2, we have to duplicate loop body once).  */
36638fd1498Szrj   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
36738fd1498Szrj   nunroll_by_av
36838fd1498Szrj     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
36938fd1498Szrj   if (nunroll > nunroll_by_av)
37038fd1498Szrj     nunroll = nunroll_by_av;
37138fd1498Szrj   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
37238fd1498Szrj     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
37338fd1498Szrj 
37438fd1498Szrj   if (targetm.loop_unroll_adjust)
37538fd1498Szrj     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
37638fd1498Szrj 
37738fd1498Szrj   /* Skip big loops.  */
37838fd1498Szrj   if (nunroll <= 1)
37938fd1498Szrj     {
38038fd1498Szrj       if (dump_file)
38138fd1498Szrj 	fprintf (dump_file, ";; Not considering loop, is too big\n");
38238fd1498Szrj       return;
38338fd1498Szrj     }
38438fd1498Szrj 
38538fd1498Szrj   /* Check for simple loops.  */
38638fd1498Szrj   desc = get_simple_loop_desc (loop);
38738fd1498Szrj 
38838fd1498Szrj   /* Check number of iterations.  */
38938fd1498Szrj   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
39038fd1498Szrj     {
39138fd1498Szrj       if (dump_file)
39238fd1498Szrj 	fprintf (dump_file,
39338fd1498Szrj 		 ";; Unable to prove that the loop iterates constant times\n");
39438fd1498Szrj       return;
39538fd1498Szrj     }
39638fd1498Szrj 
39738fd1498Szrj   /* Check for an explicit unrolling factor.  */
39838fd1498Szrj   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
39938fd1498Szrj     {
40038fd1498Szrj       /* However we cannot unroll completely at the RTL level a loop with
40138fd1498Szrj 	 constant number of iterations; it should have been peeled instead.  */
402*e215fc28Szrj       if (desc->niter == 0 || (unsigned) loop->unroll > desc->niter - 1)
40338fd1498Szrj 	{
40438fd1498Szrj 	  if (dump_file)
40538fd1498Szrj 	    fprintf (dump_file, ";; Loop should have been peeled\n");
40638fd1498Szrj 	}
40738fd1498Szrj       else
40838fd1498Szrj 	{
40938fd1498Szrj 	  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
41038fd1498Szrj 	  loop->lpt_decision.times = loop->unroll - 1;
41138fd1498Szrj 	}
41238fd1498Szrj       return;
41338fd1498Szrj     }
41438fd1498Szrj 
41538fd1498Szrj   /* Check whether the loop rolls enough to consider.
41638fd1498Szrj      Consult also loop bounds and profile; in the case the loop has more
41738fd1498Szrj      than one exit it may well loop less than determined maximal number
41838fd1498Szrj      of iterations.  */
41938fd1498Szrj   if (desc->niter < 2 * nunroll
42038fd1498Szrj       || ((get_estimated_loop_iterations (loop, &iterations)
42138fd1498Szrj 	   || get_likely_max_loop_iterations (loop, &iterations))
42238fd1498Szrj 	  && wi::ltu_p (iterations, 2 * nunroll)))
42338fd1498Szrj     {
42438fd1498Szrj       if (dump_file)
42538fd1498Szrj 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
42638fd1498Szrj       return;
42738fd1498Szrj     }
42838fd1498Szrj 
42938fd1498Szrj   /* Success; now compute number of iterations to unroll.  We alter
43038fd1498Szrj      nunroll so that as few as possible copies of loop body are
43138fd1498Szrj      necessary, while still not decreasing the number of unrollings
43238fd1498Szrj      too much (at most by 1).  */
43338fd1498Szrj   best_copies = 2 * nunroll + 10;
43438fd1498Szrj 
43538fd1498Szrj   i = 2 * nunroll + 2;
43638fd1498Szrj   if (i > desc->niter - 2)
43738fd1498Szrj     i = desc->niter - 2;
43838fd1498Szrj 
43938fd1498Szrj   for (; i >= nunroll - 1; i--)
44038fd1498Szrj     {
44138fd1498Szrj       unsigned exit_mod = desc->niter % (i + 1);
44238fd1498Szrj 
44338fd1498Szrj       if (!loop_exit_at_end_p (loop))
44438fd1498Szrj 	n_copies = exit_mod + i + 1;
44538fd1498Szrj       else if (exit_mod != (unsigned) i
44638fd1498Szrj 	       || desc->noloop_assumptions != NULL_RTX)
44738fd1498Szrj 	n_copies = exit_mod + i + 2;
44838fd1498Szrj       else
44938fd1498Szrj 	n_copies = i + 1;
45038fd1498Szrj 
45138fd1498Szrj       if (n_copies < best_copies)
45238fd1498Szrj 	{
45338fd1498Szrj 	  best_copies = n_copies;
45438fd1498Szrj 	  best_unroll = i;
45538fd1498Szrj 	}
45638fd1498Szrj     }
45738fd1498Szrj 
45838fd1498Szrj   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
45938fd1498Szrj   loop->lpt_decision.times = best_unroll;
46038fd1498Szrj }
46138fd1498Szrj 
46238fd1498Szrj /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
46338fd1498Szrj    The transformation does this:
46438fd1498Szrj 
46538fd1498Szrj    for (i = 0; i < 102; i++)
46638fd1498Szrj      body;
46738fd1498Szrj 
46838fd1498Szrj    ==>  (LOOP->LPT_DECISION.TIMES == 3)
46938fd1498Szrj 
47038fd1498Szrj    i = 0;
47138fd1498Szrj    body; i++;
47238fd1498Szrj    body; i++;
47338fd1498Szrj    while (i < 102)
47438fd1498Szrj      {
47538fd1498Szrj        body; i++;
47638fd1498Szrj        body; i++;
47738fd1498Szrj        body; i++;
47838fd1498Szrj        body; i++;
47938fd1498Szrj      }
48038fd1498Szrj   */
48138fd1498Szrj static void
unroll_loop_constant_iterations(struct loop * loop)48238fd1498Szrj unroll_loop_constant_iterations (struct loop *loop)
48338fd1498Szrj {
48438fd1498Szrj   unsigned HOST_WIDE_INT niter;
48538fd1498Szrj   unsigned exit_mod;
48638fd1498Szrj   unsigned i;
48738fd1498Szrj   edge e;
48838fd1498Szrj   unsigned max_unroll = loop->lpt_decision.times;
48938fd1498Szrj   struct niter_desc *desc = get_simple_loop_desc (loop);
49038fd1498Szrj   bool exit_at_end = loop_exit_at_end_p (loop);
49138fd1498Szrj   struct opt_info *opt_info = NULL;
49238fd1498Szrj   bool ok;
49338fd1498Szrj 
49438fd1498Szrj   niter = desc->niter;
49538fd1498Szrj 
49638fd1498Szrj   /* Should not get here (such loop should be peeled instead).  */
49738fd1498Szrj   gcc_assert (niter > max_unroll + 1);
49838fd1498Szrj 
49938fd1498Szrj   exit_mod = niter % (max_unroll + 1);
50038fd1498Szrj 
50138fd1498Szrj   auto_sbitmap wont_exit (max_unroll + 2);
50238fd1498Szrj   bitmap_ones (wont_exit);
50338fd1498Szrj 
50438fd1498Szrj   auto_vec<edge> remove_edges;
50538fd1498Szrj   if (flag_split_ivs_in_unroller
50638fd1498Szrj       || flag_variable_expansion_in_unroller)
50738fd1498Szrj     opt_info = analyze_insns_in_loop (loop);
50838fd1498Szrj 
50938fd1498Szrj   if (!exit_at_end)
51038fd1498Szrj     {
51138fd1498Szrj       /* The exit is not at the end of the loop; leave exit test
51238fd1498Szrj 	 in the first copy, so that the loops that start with test
51338fd1498Szrj 	 of exit condition have continuous body after unrolling.  */
51438fd1498Szrj 
51538fd1498Szrj       if (dump_file)
51638fd1498Szrj 	fprintf (dump_file, ";; Condition at beginning of loop.\n");
51738fd1498Szrj 
51838fd1498Szrj       /* Peel exit_mod iterations.  */
51938fd1498Szrj       bitmap_clear_bit (wont_exit, 0);
52038fd1498Szrj       if (desc->noloop_assumptions)
52138fd1498Szrj 	bitmap_clear_bit (wont_exit, 1);
52238fd1498Szrj 
52338fd1498Szrj       if (exit_mod)
52438fd1498Szrj 	{
52538fd1498Szrj 	  opt_info_start_duplication (opt_info);
52638fd1498Szrj           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
52738fd1498Szrj 					      exit_mod,
52838fd1498Szrj 					      wont_exit, desc->out_edge,
52938fd1498Szrj 					      &remove_edges,
53038fd1498Szrj 					      DLTHE_FLAG_UPDATE_FREQ
53138fd1498Szrj 					      | (opt_info && exit_mod > 1
53238fd1498Szrj 						 ? DLTHE_RECORD_COPY_NUMBER
53338fd1498Szrj 						   : 0));
53438fd1498Szrj 	  gcc_assert (ok);
53538fd1498Szrj 
53638fd1498Szrj           if (opt_info && exit_mod > 1)
53738fd1498Szrj  	    apply_opt_in_copies (opt_info, exit_mod, false, false);
53838fd1498Szrj 
53938fd1498Szrj 	  desc->noloop_assumptions = NULL_RTX;
54038fd1498Szrj 	  desc->niter -= exit_mod;
54138fd1498Szrj 	  loop->nb_iterations_upper_bound -= exit_mod;
54238fd1498Szrj 	  if (loop->any_estimate
54338fd1498Szrj 	      && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
54438fd1498Szrj 	    loop->nb_iterations_estimate -= exit_mod;
54538fd1498Szrj 	  else
54638fd1498Szrj 	    loop->any_estimate = false;
54738fd1498Szrj 	  if (loop->any_likely_upper_bound
54838fd1498Szrj 	      && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
54938fd1498Szrj 	    loop->nb_iterations_likely_upper_bound -= exit_mod;
55038fd1498Szrj 	  else
55138fd1498Szrj 	    loop->any_likely_upper_bound = false;
55238fd1498Szrj 	}
55338fd1498Szrj 
55438fd1498Szrj       bitmap_set_bit (wont_exit, 1);
55538fd1498Szrj     }
55638fd1498Szrj   else
55738fd1498Szrj     {
55838fd1498Szrj       /* Leave exit test in last copy, for the same reason as above if
55938fd1498Szrj 	 the loop tests the condition at the end of loop body.  */
56038fd1498Szrj 
56138fd1498Szrj       if (dump_file)
56238fd1498Szrj 	fprintf (dump_file, ";; Condition at end of loop.\n");
56338fd1498Szrj 
56438fd1498Szrj       /* We know that niter >= max_unroll + 2; so we do not need to care of
56538fd1498Szrj 	 case when we would exit before reaching the loop.  So just peel
56638fd1498Szrj 	 exit_mod + 1 iterations.  */
56738fd1498Szrj       if (exit_mod != max_unroll
56838fd1498Szrj 	  || desc->noloop_assumptions)
56938fd1498Szrj 	{
57038fd1498Szrj 	  bitmap_clear_bit (wont_exit, 0);
57138fd1498Szrj 	  if (desc->noloop_assumptions)
57238fd1498Szrj 	    bitmap_clear_bit (wont_exit, 1);
57338fd1498Szrj 
57438fd1498Szrj           opt_info_start_duplication (opt_info);
57538fd1498Szrj 	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
57638fd1498Szrj 					      exit_mod + 1,
57738fd1498Szrj 					      wont_exit, desc->out_edge,
57838fd1498Szrj 					      &remove_edges,
57938fd1498Szrj 					      DLTHE_FLAG_UPDATE_FREQ
58038fd1498Szrj 					      | (opt_info && exit_mod > 0
58138fd1498Szrj 						 ? DLTHE_RECORD_COPY_NUMBER
58238fd1498Szrj 						   : 0));
58338fd1498Szrj 	  gcc_assert (ok);
58438fd1498Szrj 
58538fd1498Szrj           if (opt_info && exit_mod > 0)
58638fd1498Szrj   	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
58738fd1498Szrj 
58838fd1498Szrj 	  desc->niter -= exit_mod + 1;
58938fd1498Szrj 	  loop->nb_iterations_upper_bound -= exit_mod + 1;
59038fd1498Szrj 	  if (loop->any_estimate
59138fd1498Szrj 	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
59238fd1498Szrj 	    loop->nb_iterations_estimate -= exit_mod + 1;
59338fd1498Szrj 	  else
59438fd1498Szrj 	    loop->any_estimate = false;
59538fd1498Szrj 	  if (loop->any_likely_upper_bound
59638fd1498Szrj 	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
59738fd1498Szrj 	    loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
59838fd1498Szrj 	  else
59938fd1498Szrj 	    loop->any_likely_upper_bound = false;
60038fd1498Szrj 	  desc->noloop_assumptions = NULL_RTX;
60138fd1498Szrj 
60238fd1498Szrj 	  bitmap_set_bit (wont_exit, 0);
60338fd1498Szrj 	  bitmap_set_bit (wont_exit, 1);
60438fd1498Szrj 	}
60538fd1498Szrj 
60638fd1498Szrj       bitmap_clear_bit (wont_exit, max_unroll);
60738fd1498Szrj     }
60838fd1498Szrj 
60938fd1498Szrj   /* Now unroll the loop.  */
61038fd1498Szrj 
61138fd1498Szrj   opt_info_start_duplication (opt_info);
61238fd1498Szrj   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
61338fd1498Szrj 				      max_unroll,
61438fd1498Szrj 				      wont_exit, desc->out_edge,
61538fd1498Szrj 				      &remove_edges,
61638fd1498Szrj 				      DLTHE_FLAG_UPDATE_FREQ
61738fd1498Szrj 				      | (opt_info
61838fd1498Szrj 					 ? DLTHE_RECORD_COPY_NUMBER
61938fd1498Szrj 					   : 0));
62038fd1498Szrj   gcc_assert (ok);
62138fd1498Szrj 
62238fd1498Szrj   if (opt_info)
62338fd1498Szrj     {
62438fd1498Szrj       apply_opt_in_copies (opt_info, max_unroll, true, true);
62538fd1498Szrj       free_opt_info (opt_info);
62638fd1498Szrj     }
62738fd1498Szrj 
62838fd1498Szrj   if (exit_at_end)
62938fd1498Szrj     {
63038fd1498Szrj       basic_block exit_block = get_bb_copy (desc->in_edge->src);
63138fd1498Szrj       /* Find a new in and out edge; they are in the last copy we have made.  */
63238fd1498Szrj 
63338fd1498Szrj       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
63438fd1498Szrj 	{
63538fd1498Szrj 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
63638fd1498Szrj 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
63738fd1498Szrj 	}
63838fd1498Szrj       else
63938fd1498Szrj 	{
64038fd1498Szrj 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
64138fd1498Szrj 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
64238fd1498Szrj 	}
64338fd1498Szrj     }
64438fd1498Szrj 
64538fd1498Szrj   desc->niter /= max_unroll + 1;
64638fd1498Szrj   loop->nb_iterations_upper_bound
64738fd1498Szrj     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
64838fd1498Szrj   if (loop->any_estimate)
64938fd1498Szrj     loop->nb_iterations_estimate
65038fd1498Szrj       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
65138fd1498Szrj   if (loop->any_likely_upper_bound)
65238fd1498Szrj     loop->nb_iterations_likely_upper_bound
65338fd1498Szrj       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
65438fd1498Szrj   desc->niter_expr = GEN_INT (desc->niter);
65538fd1498Szrj 
65638fd1498Szrj   /* Remove the edges.  */
65738fd1498Szrj   FOR_EACH_VEC_ELT (remove_edges, i, e)
65838fd1498Szrj     remove_path (e);
65938fd1498Szrj 
66038fd1498Szrj   if (dump_file)
66138fd1498Szrj     fprintf (dump_file,
66238fd1498Szrj 	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
66338fd1498Szrj 	     max_unroll, num_loop_insns (loop));
66438fd1498Szrj }
66538fd1498Szrj 
66638fd1498Szrj /* Decide whether to unroll LOOP iterating runtime computable number of times
66738fd1498Szrj    and how much.  */
66838fd1498Szrj static void
decide_unroll_runtime_iterations(struct loop * loop,int flags)66938fd1498Szrj decide_unroll_runtime_iterations (struct loop *loop, int flags)
67038fd1498Szrj {
67138fd1498Szrj   unsigned nunroll, nunroll_by_av, i;
67238fd1498Szrj   struct niter_desc *desc;
67338fd1498Szrj   widest_int iterations;
67438fd1498Szrj 
67538fd1498Szrj   /* If we were not asked to unroll this loop, just return back silently.  */
67638fd1498Szrj   if (!(flags & UAP_UNROLL) && !loop->unroll)
67738fd1498Szrj     return;
67838fd1498Szrj 
67938fd1498Szrj   if (dump_enabled_p ())
68038fd1498Szrj     dump_printf (MSG_NOTE,
68138fd1498Szrj 		 "considering unrolling loop with runtime-"
68238fd1498Szrj 		 "computable number of iterations\n");
68338fd1498Szrj 
68438fd1498Szrj   /* nunroll = total number of copies of the original loop body in
68538fd1498Szrj      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
68638fd1498Szrj   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
68738fd1498Szrj   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
68838fd1498Szrj   if (nunroll > nunroll_by_av)
68938fd1498Szrj     nunroll = nunroll_by_av;
69038fd1498Szrj   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
69138fd1498Szrj     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
69238fd1498Szrj 
69338fd1498Szrj   if (targetm.loop_unroll_adjust)
69438fd1498Szrj     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
69538fd1498Szrj 
69638fd1498Szrj   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
69738fd1498Szrj     nunroll = loop->unroll;
69838fd1498Szrj 
69938fd1498Szrj   /* Skip big loops.  */
70038fd1498Szrj   if (nunroll <= 1)
70138fd1498Szrj     {
70238fd1498Szrj       if (dump_file)
70338fd1498Szrj 	fprintf (dump_file, ";; Not considering loop, is too big\n");
70438fd1498Szrj       return;
70538fd1498Szrj     }
70638fd1498Szrj 
70738fd1498Szrj   /* Check for simple loops.  */
70838fd1498Szrj   desc = get_simple_loop_desc (loop);
70938fd1498Szrj 
71038fd1498Szrj   /* Check simpleness.  */
71138fd1498Szrj   if (!desc->simple_p || desc->assumptions)
71238fd1498Szrj     {
71338fd1498Szrj       if (dump_file)
71438fd1498Szrj 	fprintf (dump_file,
71538fd1498Szrj 		 ";; Unable to prove that the number of iterations "
71638fd1498Szrj 		 "can be counted in runtime\n");
71738fd1498Szrj       return;
71838fd1498Szrj     }
71938fd1498Szrj 
72038fd1498Szrj   if (desc->const_iter)
72138fd1498Szrj     {
72238fd1498Szrj       if (dump_file)
72338fd1498Szrj 	fprintf (dump_file, ";; Loop iterates constant times\n");
72438fd1498Szrj       return;
72538fd1498Szrj     }
72638fd1498Szrj 
72738fd1498Szrj   /* Check whether the loop rolls.  */
72838fd1498Szrj   if ((get_estimated_loop_iterations (loop, &iterations)
72938fd1498Szrj        || get_likely_max_loop_iterations (loop, &iterations))
73038fd1498Szrj       && wi::ltu_p (iterations, 2 * nunroll))
73138fd1498Szrj     {
73238fd1498Szrj       if (dump_file)
73338fd1498Szrj 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
73438fd1498Szrj       return;
73538fd1498Szrj     }
73638fd1498Szrj 
73738fd1498Szrj   /* Success; now force nunroll to be power of 2, as code-gen
73838fd1498Szrj      requires it, we are unable to cope with overflows in
73938fd1498Szrj      computation of number of iterations.  */
74038fd1498Szrj   for (i = 1; 2 * i <= nunroll; i *= 2)
74138fd1498Szrj     continue;
74238fd1498Szrj 
74338fd1498Szrj   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
74438fd1498Szrj   loop->lpt_decision.times = i - 1;
74538fd1498Szrj }
74638fd1498Szrj 
74738fd1498Szrj /* Splits edge E and inserts the sequence of instructions INSNS on it, and
74838fd1498Szrj    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
74938fd1498Szrj    and NULL is returned instead.  */
75038fd1498Szrj 
75138fd1498Szrj basic_block
split_edge_and_insert(edge e,rtx_insn * insns)75238fd1498Szrj split_edge_and_insert (edge e, rtx_insn *insns)
75338fd1498Szrj {
75438fd1498Szrj   basic_block bb;
75538fd1498Szrj 
75638fd1498Szrj   if (!insns)
75738fd1498Szrj     return NULL;
75838fd1498Szrj   bb = split_edge (e);
75938fd1498Szrj   emit_insn_after (insns, BB_END (bb));
76038fd1498Szrj 
76138fd1498Szrj   /* ??? We used to assume that INSNS can contain control flow insns, and
76238fd1498Szrj      that we had to try to find sub basic blocks in BB to maintain a valid
76338fd1498Szrj      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
76438fd1498Szrj      and call break_superblocks when going out of cfglayout mode.  But it
76538fd1498Szrj      turns out that this never happens; and that if it does ever happen,
76638fd1498Szrj      the verify_flow_info at the end of the RTL loop passes would fail.
76738fd1498Szrj 
76838fd1498Szrj      There are two reasons why we expected we could have control flow insns
76938fd1498Szrj      in INSNS.  The first is when a comparison has to be done in parts, and
77038fd1498Szrj      the second is when the number of iterations is computed for loops with
77138fd1498Szrj      the number of iterations known at runtime.  In both cases, test cases
77238fd1498Szrj      to get control flow in INSNS appear to be impossible to construct:
77338fd1498Szrj 
77438fd1498Szrj       * If do_compare_rtx_and_jump needs several branches to do comparison
77538fd1498Szrj 	in a mode that needs comparison by parts, we cannot analyze the
77638fd1498Szrj 	number of iterations of the loop, and we never get to unrolling it.
77738fd1498Szrj 
77838fd1498Szrj       * The code in expand_divmod that was suspected to cause creation of
77938fd1498Szrj 	branching code seems to be only accessed for signed division.  The
78038fd1498Szrj 	divisions used by # of iterations analysis are always unsigned.
78138fd1498Szrj 	Problems might arise on architectures that emits branching code
78238fd1498Szrj 	for some operations that may appear in the unroller (especially
78338fd1498Szrj 	for division), but we have no such architectures.
78438fd1498Szrj 
78538fd1498Szrj      Considering all this, it was decided that we should for now assume
78638fd1498Szrj      that INSNS can in theory contain control flow insns, but in practice
78738fd1498Szrj      it never does.  So we don't handle the theoretical case, and should
78838fd1498Szrj      a real failure ever show up, we have a pretty good clue for how to
78938fd1498Szrj      fix it.  */
79038fd1498Szrj 
79138fd1498Szrj   return bb;
79238fd1498Szrj }
79338fd1498Szrj 
79438fd1498Szrj /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
79538fd1498Szrj    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
79638fd1498Szrj    in order to create a jump.  */
79738fd1498Szrj 
79838fd1498Szrj static rtx_insn *
compare_and_jump_seq(rtx op0,rtx op1,enum rtx_code comp,rtx_code_label * label,profile_probability prob,rtx_insn * cinsn)79938fd1498Szrj compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
80038fd1498Szrj 		      rtx_code_label *label, profile_probability prob,
80138fd1498Szrj 		      rtx_insn *cinsn)
80238fd1498Szrj {
80338fd1498Szrj   rtx_insn *seq;
80438fd1498Szrj   rtx_jump_insn *jump;
80538fd1498Szrj   rtx cond;
80638fd1498Szrj   machine_mode mode;
80738fd1498Szrj 
80838fd1498Szrj   mode = GET_MODE (op0);
80938fd1498Szrj   if (mode == VOIDmode)
81038fd1498Szrj     mode = GET_MODE (op1);
81138fd1498Szrj 
81238fd1498Szrj   start_sequence ();
81338fd1498Szrj   if (GET_MODE_CLASS (mode) == MODE_CC)
81438fd1498Szrj     {
81538fd1498Szrj       /* A hack -- there seems to be no easy generic way how to make a
81638fd1498Szrj 	 conditional jump from a ccmode comparison.  */
81738fd1498Szrj       gcc_assert (cinsn);
81838fd1498Szrj       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
81938fd1498Szrj       gcc_assert (GET_CODE (cond) == comp);
82038fd1498Szrj       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
82138fd1498Szrj       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
82238fd1498Szrj       emit_jump_insn (copy_insn (PATTERN (cinsn)));
82338fd1498Szrj       jump = as_a <rtx_jump_insn *> (get_last_insn ());
82438fd1498Szrj       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
82538fd1498Szrj       LABEL_NUSES (JUMP_LABEL (jump))++;
82638fd1498Szrj       redirect_jump (jump, label, 0);
82738fd1498Szrj     }
82838fd1498Szrj   else
82938fd1498Szrj     {
83038fd1498Szrj       gcc_assert (!cinsn);
83138fd1498Szrj 
83238fd1498Szrj       op0 = force_operand (op0, NULL_RTX);
83338fd1498Szrj       op1 = force_operand (op1, NULL_RTX);
83438fd1498Szrj       do_compare_rtx_and_jump (op0, op1, comp, 0,
83538fd1498Szrj 			       mode, NULL_RTX, NULL, label,
83638fd1498Szrj 			       profile_probability::uninitialized ());
83738fd1498Szrj       jump = as_a <rtx_jump_insn *> (get_last_insn ());
83838fd1498Szrj       jump->set_jump_target (label);
83938fd1498Szrj       LABEL_NUSES (label)++;
84038fd1498Szrj     }
84138fd1498Szrj   if (prob.initialized_p ())
84238fd1498Szrj     add_reg_br_prob_note (jump, prob);
84338fd1498Szrj 
84438fd1498Szrj   seq = get_insns ();
84538fd1498Szrj   end_sequence ();
84638fd1498Szrj 
84738fd1498Szrj   return seq;
84838fd1498Szrj }
84938fd1498Szrj 
85038fd1498Szrj /* Unroll LOOP for which we are able to count number of iterations in
85138fd1498Szrj    runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
85238fd1498Szrj    power of two.  The transformation does this (with some extra care
85338fd1498Szrj    for case n < 0):
85438fd1498Szrj 
85538fd1498Szrj    for (i = 0; i < n; i++)
85638fd1498Szrj      body;
85738fd1498Szrj 
85838fd1498Szrj    ==>  (LOOP->LPT_DECISION.TIMES == 3)
85938fd1498Szrj 
86038fd1498Szrj    i = 0;
86138fd1498Szrj    mod = n % 4;
86238fd1498Szrj 
86338fd1498Szrj    switch (mod)
86438fd1498Szrj      {
86538fd1498Szrj        case 3:
86638fd1498Szrj          body; i++;
86738fd1498Szrj        case 2:
86838fd1498Szrj          body; i++;
86938fd1498Szrj        case 1:
87038fd1498Szrj          body; i++;
87138fd1498Szrj        case 0: ;
87238fd1498Szrj      }
87338fd1498Szrj 
87438fd1498Szrj    while (i < n)
87538fd1498Szrj      {
87638fd1498Szrj        body; i++;
87738fd1498Szrj        body; i++;
87838fd1498Szrj        body; i++;
87938fd1498Szrj        body; i++;
88038fd1498Szrj      }
88138fd1498Szrj    */
88238fd1498Szrj static void
unroll_loop_runtime_iterations(struct loop * loop)88338fd1498Szrj unroll_loop_runtime_iterations (struct loop *loop)
88438fd1498Szrj {
88538fd1498Szrj   rtx old_niter, niter, tmp;
88638fd1498Szrj   rtx_insn *init_code, *branch_code;
88738fd1498Szrj   unsigned i, j;
88838fd1498Szrj   profile_probability p;
88938fd1498Szrj   basic_block preheader, *body, swtch, ezc_swtch = NULL;
89038fd1498Szrj   int may_exit_copy;
89138fd1498Szrj   profile_count iter_count, new_count;
89238fd1498Szrj   unsigned n_peel;
89338fd1498Szrj   edge e;
89438fd1498Szrj   bool extra_zero_check, last_may_exit;
89538fd1498Szrj   unsigned max_unroll = loop->lpt_decision.times;
89638fd1498Szrj   struct niter_desc *desc = get_simple_loop_desc (loop);
89738fd1498Szrj   bool exit_at_end = loop_exit_at_end_p (loop);
89838fd1498Szrj   struct opt_info *opt_info = NULL;
89938fd1498Szrj   bool ok;
90038fd1498Szrj 
90138fd1498Szrj   if (flag_split_ivs_in_unroller
90238fd1498Szrj       || flag_variable_expansion_in_unroller)
90338fd1498Szrj     opt_info = analyze_insns_in_loop (loop);
90438fd1498Szrj 
90538fd1498Szrj   /* Remember blocks whose dominators will have to be updated.  */
90638fd1498Szrj   auto_vec<basic_block> dom_bbs;
90738fd1498Szrj 
90838fd1498Szrj   body = get_loop_body (loop);
90938fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
91038fd1498Szrj     {
91138fd1498Szrj       vec<basic_block> ldom;
91238fd1498Szrj       basic_block bb;
91338fd1498Szrj 
91438fd1498Szrj       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
91538fd1498Szrj       FOR_EACH_VEC_ELT (ldom, j, bb)
91638fd1498Szrj 	if (!flow_bb_inside_loop_p (loop, bb))
91738fd1498Szrj 	  dom_bbs.safe_push (bb);
91838fd1498Szrj 
91938fd1498Szrj       ldom.release ();
92038fd1498Szrj     }
92138fd1498Szrj   free (body);
92238fd1498Szrj 
92338fd1498Szrj   if (!exit_at_end)
92438fd1498Szrj     {
92538fd1498Szrj       /* Leave exit in first copy (for explanation why see comment in
92638fd1498Szrj 	 unroll_loop_constant_iterations).  */
92738fd1498Szrj       may_exit_copy = 0;
92838fd1498Szrj       n_peel = max_unroll - 1;
92938fd1498Szrj       extra_zero_check = true;
93038fd1498Szrj       last_may_exit = false;
93138fd1498Szrj     }
93238fd1498Szrj   else
93338fd1498Szrj     {
93438fd1498Szrj       /* Leave exit in last copy (for explanation why see comment in
93538fd1498Szrj 	 unroll_loop_constant_iterations).  */
93638fd1498Szrj       may_exit_copy = max_unroll;
93738fd1498Szrj       n_peel = max_unroll;
93838fd1498Szrj       extra_zero_check = false;
93938fd1498Szrj       last_may_exit = true;
94038fd1498Szrj     }
94138fd1498Szrj 
94238fd1498Szrj   /* Get expression for number of iterations.  */
94338fd1498Szrj   start_sequence ();
94438fd1498Szrj   old_niter = niter = gen_reg_rtx (desc->mode);
94538fd1498Szrj   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
94638fd1498Szrj   if (tmp != niter)
94738fd1498Szrj     emit_move_insn (niter, tmp);
94838fd1498Szrj 
94938fd1498Szrj   /* For loops that exit at end and whose number of iterations is reliable,
95038fd1498Szrj      add one to niter to account for first pass through loop body before
95138fd1498Szrj      reaching exit test. */
95238fd1498Szrj   if (exit_at_end && !desc->noloop_assumptions)
95338fd1498Szrj     {
95438fd1498Szrj       niter = expand_simple_binop (desc->mode, PLUS,
95538fd1498Szrj 				   niter, const1_rtx,
95638fd1498Szrj 				   NULL_RTX, 0, OPTAB_LIB_WIDEN);
95738fd1498Szrj       old_niter = niter;
95838fd1498Szrj     }
95938fd1498Szrj 
96038fd1498Szrj   /* Count modulo by ANDing it with max_unroll; we use the fact that
96138fd1498Szrj      the number of unrollings is a power of two, and thus this is correct
96238fd1498Szrj      even if there is overflow in the computation.  */
96338fd1498Szrj   niter = expand_simple_binop (desc->mode, AND,
96438fd1498Szrj 			       niter, gen_int_mode (max_unroll, desc->mode),
96538fd1498Szrj 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
96638fd1498Szrj 
96738fd1498Szrj   init_code = get_insns ();
96838fd1498Szrj   end_sequence ();
96938fd1498Szrj   unshare_all_rtl_in_chain (init_code);
97038fd1498Szrj 
97138fd1498Szrj   /* Precondition the loop.  */
97238fd1498Szrj   split_edge_and_insert (loop_preheader_edge (loop), init_code);
97338fd1498Szrj 
97438fd1498Szrj   auto_vec<edge> remove_edges;
97538fd1498Szrj 
97638fd1498Szrj   auto_sbitmap wont_exit (max_unroll + 2);
97738fd1498Szrj 
97838fd1498Szrj   if (extra_zero_check || desc->noloop_assumptions)
97938fd1498Szrj     {
98038fd1498Szrj       /* Peel the first copy of loop body.  Leave the exit test if the number
98138fd1498Szrj 	 of iterations is not reliable.  Also record the place of the extra zero
98238fd1498Szrj 	 check.  */
98338fd1498Szrj       bitmap_clear (wont_exit);
98438fd1498Szrj       if (!desc->noloop_assumptions)
98538fd1498Szrj 	bitmap_set_bit (wont_exit, 1);
98638fd1498Szrj       ezc_swtch = loop_preheader_edge (loop)->src;
98738fd1498Szrj       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
98838fd1498Szrj 					  1, wont_exit, desc->out_edge,
98938fd1498Szrj 					  &remove_edges,
99038fd1498Szrj 					  DLTHE_FLAG_UPDATE_FREQ);
99138fd1498Szrj       gcc_assert (ok);
99238fd1498Szrj     }
99338fd1498Szrj 
99438fd1498Szrj   /* Record the place where switch will be built for preconditioning.  */
99538fd1498Szrj   swtch = split_edge (loop_preheader_edge (loop));
99638fd1498Szrj 
99738fd1498Szrj   /* Compute count increments for each switch block and initialize
99838fd1498Szrj      innermost switch block.  Switch blocks and peeled loop copies are built
99938fd1498Szrj      from innermost outward.  */
100038fd1498Szrj   iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
100138fd1498Szrj   swtch->count = new_count;
100238fd1498Szrj 
100338fd1498Szrj   for (i = 0; i < n_peel; i++)
100438fd1498Szrj     {
100538fd1498Szrj       /* Peel the copy.  */
100638fd1498Szrj       bitmap_clear (wont_exit);
100738fd1498Szrj       if (i != n_peel - 1 || !last_may_exit)
100838fd1498Szrj 	bitmap_set_bit (wont_exit, 1);
100938fd1498Szrj       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
101038fd1498Szrj 					  1, wont_exit, desc->out_edge,
101138fd1498Szrj 					  &remove_edges,
101238fd1498Szrj 					  DLTHE_FLAG_UPDATE_FREQ);
101338fd1498Szrj       gcc_assert (ok);
101438fd1498Szrj 
101538fd1498Szrj       /* Create item for switch.  */
101638fd1498Szrj       j = n_peel - i - (extra_zero_check ? 0 : 1);
101738fd1498Szrj       p = profile_probability::always ().apply_scale (1, i + 2);
101838fd1498Szrj 
101938fd1498Szrj       preheader = split_edge (loop_preheader_edge (loop));
102038fd1498Szrj       /* Add in count of edge from switch block.  */
102138fd1498Szrj       preheader->count += iter_count;
102238fd1498Szrj       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
102338fd1498Szrj 					  block_label (preheader), p,
102438fd1498Szrj 					  NULL);
102538fd1498Szrj 
102638fd1498Szrj       /* We rely on the fact that the compare and jump cannot be optimized out,
102738fd1498Szrj 	 and hence the cfg we create is correct.  */
102838fd1498Szrj       gcc_assert (branch_code != NULL_RTX);
102938fd1498Szrj 
103038fd1498Szrj       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
103138fd1498Szrj       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
103238fd1498Szrj       single_succ_edge (swtch)->probability = p.invert ();
103338fd1498Szrj       new_count += iter_count;
103438fd1498Szrj       swtch->count = new_count;
103538fd1498Szrj       e = make_edge (swtch, preheader,
103638fd1498Szrj 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
103738fd1498Szrj       e->probability = p;
103838fd1498Szrj     }
103938fd1498Szrj 
104038fd1498Szrj   if (extra_zero_check)
104138fd1498Szrj     {
104238fd1498Szrj       /* Add branch for zero iterations.  */
104338fd1498Szrj       p = profile_probability::always ().apply_scale (1, max_unroll + 1);
104438fd1498Szrj       swtch = ezc_swtch;
104538fd1498Szrj       preheader = split_edge (loop_preheader_edge (loop));
104638fd1498Szrj       /* Recompute count adjustments since initial peel copy may
104738fd1498Szrj 	 have exited and reduced those values that were computed above.  */
104838fd1498Szrj       iter_count = swtch->count.apply_scale (1, max_unroll + 1);
104938fd1498Szrj       /* Add in count of edge from switch block.  */
105038fd1498Szrj       preheader->count += iter_count;
105138fd1498Szrj       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
105238fd1498Szrj 					  block_label (preheader), p,
105338fd1498Szrj 					  NULL);
105438fd1498Szrj       gcc_assert (branch_code != NULL_RTX);
105538fd1498Szrj 
105638fd1498Szrj       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
105738fd1498Szrj       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
105838fd1498Szrj       single_succ_edge (swtch)->probability = p.invert ();
105938fd1498Szrj       e = make_edge (swtch, preheader,
106038fd1498Szrj 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
106138fd1498Szrj       e->probability = p;
106238fd1498Szrj     }
106338fd1498Szrj 
106438fd1498Szrj   /* Recount dominators for outer blocks.  */
106538fd1498Szrj   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
106638fd1498Szrj 
106738fd1498Szrj   /* And unroll loop.  */
106838fd1498Szrj 
106938fd1498Szrj   bitmap_ones (wont_exit);
107038fd1498Szrj   bitmap_clear_bit (wont_exit, may_exit_copy);
107138fd1498Szrj   opt_info_start_duplication (opt_info);
107238fd1498Szrj 
107338fd1498Szrj   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
107438fd1498Szrj 				      max_unroll,
107538fd1498Szrj 				      wont_exit, desc->out_edge,
107638fd1498Szrj 				      &remove_edges,
107738fd1498Szrj 				      DLTHE_FLAG_UPDATE_FREQ
107838fd1498Szrj 				      | (opt_info
107938fd1498Szrj 					 ? DLTHE_RECORD_COPY_NUMBER
108038fd1498Szrj 					   : 0));
108138fd1498Szrj   gcc_assert (ok);
108238fd1498Szrj 
108338fd1498Szrj   if (opt_info)
108438fd1498Szrj     {
108538fd1498Szrj       apply_opt_in_copies (opt_info, max_unroll, true, true);
108638fd1498Szrj       free_opt_info (opt_info);
108738fd1498Szrj     }
108838fd1498Szrj 
108938fd1498Szrj   if (exit_at_end)
109038fd1498Szrj     {
109138fd1498Szrj       basic_block exit_block = get_bb_copy (desc->in_edge->src);
109238fd1498Szrj       /* Find a new in and out edge; they are in the last copy we have
109338fd1498Szrj 	 made.  */
109438fd1498Szrj 
109538fd1498Szrj       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
109638fd1498Szrj 	{
109738fd1498Szrj 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
109838fd1498Szrj 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
109938fd1498Szrj 	}
110038fd1498Szrj       else
110138fd1498Szrj 	{
110238fd1498Szrj 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
110338fd1498Szrj 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
110438fd1498Szrj 	}
110538fd1498Szrj     }
110638fd1498Szrj 
110738fd1498Szrj   /* Remove the edges.  */
110838fd1498Szrj   FOR_EACH_VEC_ELT (remove_edges, i, e)
110938fd1498Szrj     remove_path (e);
111038fd1498Szrj 
111138fd1498Szrj   /* We must be careful when updating the number of iterations due to
111238fd1498Szrj      preconditioning and the fact that the value must be valid at entry
111338fd1498Szrj      of the loop.  After passing through the above code, we see that
111438fd1498Szrj      the correct new number of iterations is this:  */
111538fd1498Szrj   gcc_assert (!desc->const_iter);
111638fd1498Szrj   desc->niter_expr =
111738fd1498Szrj     simplify_gen_binary (UDIV, desc->mode, old_niter,
111838fd1498Szrj 			 gen_int_mode (max_unroll + 1, desc->mode));
111938fd1498Szrj   loop->nb_iterations_upper_bound
112038fd1498Szrj     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
112138fd1498Szrj   if (loop->any_estimate)
112238fd1498Szrj     loop->nb_iterations_estimate
112338fd1498Szrj       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
112438fd1498Szrj   if (loop->any_likely_upper_bound)
112538fd1498Szrj     loop->nb_iterations_likely_upper_bound
112638fd1498Szrj       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
112738fd1498Szrj   if (exit_at_end)
112838fd1498Szrj     {
112938fd1498Szrj       desc->niter_expr =
113038fd1498Szrj 	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
113138fd1498Szrj       desc->noloop_assumptions = NULL_RTX;
113238fd1498Szrj       --loop->nb_iterations_upper_bound;
113338fd1498Szrj       if (loop->any_estimate
113438fd1498Szrj 	  && loop->nb_iterations_estimate != 0)
113538fd1498Szrj 	--loop->nb_iterations_estimate;
113638fd1498Szrj       else
113738fd1498Szrj 	loop->any_estimate = false;
113838fd1498Szrj       if (loop->any_likely_upper_bound
113938fd1498Szrj 	  && loop->nb_iterations_likely_upper_bound != 0)
114038fd1498Szrj 	--loop->nb_iterations_likely_upper_bound;
114138fd1498Szrj       else
114238fd1498Szrj 	loop->any_likely_upper_bound = false;
114338fd1498Szrj     }
114438fd1498Szrj 
114538fd1498Szrj   if (dump_file)
114638fd1498Szrj     fprintf (dump_file,
114738fd1498Szrj 	     ";; Unrolled loop %d times, counting # of iterations "
114838fd1498Szrj 	     "in runtime, %i insns\n",
114938fd1498Szrj 	     max_unroll, num_loop_insns (loop));
115038fd1498Szrj }
115138fd1498Szrj 
115238fd1498Szrj /* Decide whether to unroll LOOP stupidly and how much.  */
115338fd1498Szrj static void
decide_unroll_stupid(struct loop * loop,int flags)115438fd1498Szrj decide_unroll_stupid (struct loop *loop, int flags)
115538fd1498Szrj {
115638fd1498Szrj   unsigned nunroll, nunroll_by_av, i;
115738fd1498Szrj   struct niter_desc *desc;
115838fd1498Szrj   widest_int iterations;
115938fd1498Szrj 
116038fd1498Szrj   /* If we were not asked to unroll this loop, just return back silently.  */
116138fd1498Szrj   if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
116238fd1498Szrj     return;
116338fd1498Szrj 
116438fd1498Szrj   if (dump_enabled_p ())
116538fd1498Szrj     dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n");
116638fd1498Szrj 
116738fd1498Szrj   /* nunroll = total number of copies of the original loop body in
116838fd1498Szrj      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
116938fd1498Szrj   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
117038fd1498Szrj   nunroll_by_av
117138fd1498Szrj     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
117238fd1498Szrj   if (nunroll > nunroll_by_av)
117338fd1498Szrj     nunroll = nunroll_by_av;
117438fd1498Szrj   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
117538fd1498Szrj     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
117638fd1498Szrj 
117738fd1498Szrj   if (targetm.loop_unroll_adjust)
117838fd1498Szrj     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
117938fd1498Szrj 
118038fd1498Szrj   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
118138fd1498Szrj     nunroll = loop->unroll;
118238fd1498Szrj 
118338fd1498Szrj   /* Skip big loops.  */
118438fd1498Szrj   if (nunroll <= 1)
118538fd1498Szrj     {
118638fd1498Szrj       if (dump_file)
118738fd1498Szrj 	fprintf (dump_file, ";; Not considering loop, is too big\n");
118838fd1498Szrj       return;
118938fd1498Szrj     }
119038fd1498Szrj 
119138fd1498Szrj   /* Check for simple loops.  */
119238fd1498Szrj   desc = get_simple_loop_desc (loop);
119338fd1498Szrj 
119438fd1498Szrj   /* Check simpleness.  */
119538fd1498Szrj   if (desc->simple_p && !desc->assumptions)
119638fd1498Szrj     {
119738fd1498Szrj       if (dump_file)
119838fd1498Szrj 	fprintf (dump_file, ";; Loop is simple\n");
119938fd1498Szrj       return;
120038fd1498Szrj     }
120138fd1498Szrj 
120238fd1498Szrj   /* Do not unroll loops with branches inside -- it increases number
120338fd1498Szrj      of mispredicts.
120438fd1498Szrj      TODO: this heuristic needs tunning; call inside the loop body
120538fd1498Szrj      is also relatively good reason to not unroll.  */
120638fd1498Szrj   if (num_loop_branches (loop) > 1)
120738fd1498Szrj     {
120838fd1498Szrj       if (dump_file)
120938fd1498Szrj 	fprintf (dump_file, ";; Not unrolling, contains branches\n");
121038fd1498Szrj       return;
121138fd1498Szrj     }
121238fd1498Szrj 
121338fd1498Szrj   /* Check whether the loop rolls.  */
121438fd1498Szrj   if ((get_estimated_loop_iterations (loop, &iterations)
121538fd1498Szrj        || get_likely_max_loop_iterations (loop, &iterations))
121638fd1498Szrj       && wi::ltu_p (iterations, 2 * nunroll))
121738fd1498Szrj     {
121838fd1498Szrj       if (dump_file)
121938fd1498Szrj 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
122038fd1498Szrj       return;
122138fd1498Szrj     }
122238fd1498Szrj 
122338fd1498Szrj   /* Success.  Now force nunroll to be power of 2, as it seems that this
122438fd1498Szrj      improves results (partially because of better alignments, partially
122538fd1498Szrj      because of some dark magic).  */
122638fd1498Szrj   for (i = 1; 2 * i <= nunroll; i *= 2)
122738fd1498Szrj     continue;
122838fd1498Szrj 
122938fd1498Szrj   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
123038fd1498Szrj   loop->lpt_decision.times = i - 1;
123138fd1498Szrj }
123238fd1498Szrj 
123338fd1498Szrj /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
123438fd1498Szrj 
123538fd1498Szrj    while (cond)
123638fd1498Szrj      body;
123738fd1498Szrj 
123838fd1498Szrj    ==>  (LOOP->LPT_DECISION.TIMES == 3)
123938fd1498Szrj 
124038fd1498Szrj    while (cond)
124138fd1498Szrj      {
124238fd1498Szrj        body;
124338fd1498Szrj        if (!cond) break;
124438fd1498Szrj        body;
124538fd1498Szrj        if (!cond) break;
124638fd1498Szrj        body;
124738fd1498Szrj        if (!cond) break;
124838fd1498Szrj        body;
124938fd1498Szrj      }
125038fd1498Szrj    */
125138fd1498Szrj static void
unroll_loop_stupid(struct loop * loop)125238fd1498Szrj unroll_loop_stupid (struct loop *loop)
125338fd1498Szrj {
125438fd1498Szrj   unsigned nunroll = loop->lpt_decision.times;
125538fd1498Szrj   struct niter_desc *desc = get_simple_loop_desc (loop);
125638fd1498Szrj   struct opt_info *opt_info = NULL;
125738fd1498Szrj   bool ok;
125838fd1498Szrj 
125938fd1498Szrj   if (flag_split_ivs_in_unroller
126038fd1498Szrj       || flag_variable_expansion_in_unroller)
126138fd1498Szrj     opt_info = analyze_insns_in_loop (loop);
126238fd1498Szrj 
126338fd1498Szrj   auto_sbitmap wont_exit (nunroll + 1);
126438fd1498Szrj   bitmap_clear (wont_exit);
126538fd1498Szrj   opt_info_start_duplication (opt_info);
126638fd1498Szrj 
126738fd1498Szrj   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
126838fd1498Szrj 				      nunroll, wont_exit,
126938fd1498Szrj 				      NULL, NULL,
127038fd1498Szrj 				      DLTHE_FLAG_UPDATE_FREQ
127138fd1498Szrj 				      | (opt_info
127238fd1498Szrj 					 ? DLTHE_RECORD_COPY_NUMBER
127338fd1498Szrj 					   : 0));
127438fd1498Szrj   gcc_assert (ok);
127538fd1498Szrj 
127638fd1498Szrj   if (opt_info)
127738fd1498Szrj     {
127838fd1498Szrj       apply_opt_in_copies (opt_info, nunroll, true, true);
127938fd1498Szrj       free_opt_info (opt_info);
128038fd1498Szrj     }
128138fd1498Szrj 
128238fd1498Szrj   if (desc->simple_p)
128338fd1498Szrj     {
128438fd1498Szrj       /* We indeed may get here provided that there are nontrivial assumptions
128538fd1498Szrj 	 for a loop to be really simple.  We could update the counts, but the
128638fd1498Szrj 	 problem is that we are unable to decide which exit will be taken
128738fd1498Szrj 	 (not really true in case the number of iterations is constant,
128838fd1498Szrj 	 but no one will do anything with this information, so we do not
128938fd1498Szrj 	 worry about it).  */
129038fd1498Szrj       desc->simple_p = false;
129138fd1498Szrj     }
129238fd1498Szrj 
129338fd1498Szrj   if (dump_file)
129438fd1498Szrj     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
129538fd1498Szrj 	     nunroll, num_loop_insns (loop));
129638fd1498Szrj }
129738fd1498Szrj 
129838fd1498Szrj /* Returns true if REG is referenced in one nondebug insn in LOOP.
129938fd1498Szrj    Set *DEBUG_USES to the number of debug insns that reference the
130038fd1498Szrj    variable.  */
130138fd1498Szrj 
130238fd1498Szrj static bool
referenced_in_one_insn_in_loop_p(struct loop * loop,rtx reg,int * debug_uses)130338fd1498Szrj referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
130438fd1498Szrj 				  int *debug_uses)
130538fd1498Szrj {
130638fd1498Szrj   basic_block *body, bb;
130738fd1498Szrj   unsigned i;
130838fd1498Szrj   int count_ref = 0;
130938fd1498Szrj   rtx_insn *insn;
131038fd1498Szrj 
131138fd1498Szrj   body = get_loop_body (loop);
131238fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
131338fd1498Szrj     {
131438fd1498Szrj       bb = body[i];
131538fd1498Szrj 
131638fd1498Szrj       FOR_BB_INSNS (bb, insn)
131738fd1498Szrj 	if (!rtx_referenced_p (reg, insn))
131838fd1498Szrj 	  continue;
131938fd1498Szrj 	else if (DEBUG_INSN_P (insn))
132038fd1498Szrj 	  ++*debug_uses;
132138fd1498Szrj 	else if (++count_ref > 1)
132238fd1498Szrj 	  break;
132338fd1498Szrj     }
132438fd1498Szrj   free (body);
132538fd1498Szrj   return (count_ref  == 1);
132638fd1498Szrj }
132738fd1498Szrj 
132838fd1498Szrj /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
132938fd1498Szrj 
133038fd1498Szrj static void
reset_debug_uses_in_loop(struct loop * loop,rtx reg,int debug_uses)133138fd1498Szrj reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
133238fd1498Szrj {
133338fd1498Szrj   basic_block *body, bb;
133438fd1498Szrj   unsigned i;
133538fd1498Szrj   rtx_insn *insn;
133638fd1498Szrj 
133738fd1498Szrj   body = get_loop_body (loop);
133838fd1498Szrj   for (i = 0; debug_uses && i < loop->num_nodes; i++)
133938fd1498Szrj     {
134038fd1498Szrj       bb = body[i];
134138fd1498Szrj 
134238fd1498Szrj       FOR_BB_INSNS (bb, insn)
134338fd1498Szrj 	if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
134438fd1498Szrj 	  continue;
134538fd1498Szrj 	else
134638fd1498Szrj 	  {
134738fd1498Szrj 	    validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
134838fd1498Szrj 			     gen_rtx_UNKNOWN_VAR_LOC (), 0);
134938fd1498Szrj 	    if (!--debug_uses)
135038fd1498Szrj 	      break;
135138fd1498Szrj 	  }
135238fd1498Szrj     }
135338fd1498Szrj   free (body);
135438fd1498Szrj }
135538fd1498Szrj 
135638fd1498Szrj /* Determine whether INSN contains an accumulator
135738fd1498Szrj    which can be expanded into separate copies,
135838fd1498Szrj    one for each copy of the LOOP body.
135938fd1498Szrj 
136038fd1498Szrj    for (i = 0 ; i < n; i++)
136138fd1498Szrj      sum += a[i];
136238fd1498Szrj 
136338fd1498Szrj    ==>
136438fd1498Szrj 
136538fd1498Szrj    sum += a[i]
136638fd1498Szrj    ....
136738fd1498Szrj    i = i+1;
136838fd1498Szrj    sum1 += a[i]
136938fd1498Szrj    ....
137038fd1498Szrj    i = i+1
137138fd1498Szrj    sum2 += a[i];
137238fd1498Szrj    ....
137338fd1498Szrj 
137438fd1498Szrj    Return NULL if INSN contains no opportunity for expansion of accumulator.
137538fd1498Szrj    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
137638fd1498Szrj    information and return a pointer to it.
137738fd1498Szrj */
137838fd1498Szrj 
137938fd1498Szrj static struct var_to_expand *
analyze_insn_to_expand_var(struct loop * loop,rtx_insn * insn)138038fd1498Szrj analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
138138fd1498Szrj {
138238fd1498Szrj   rtx set, dest, src;
138338fd1498Szrj   struct var_to_expand *ves;
138438fd1498Szrj   unsigned accum_pos;
138538fd1498Szrj   enum rtx_code code;
138638fd1498Szrj   int debug_uses = 0;
138738fd1498Szrj 
138838fd1498Szrj   set = single_set (insn);
138938fd1498Szrj   if (!set)
139038fd1498Szrj     return NULL;
139138fd1498Szrj 
139238fd1498Szrj   dest = SET_DEST (set);
139338fd1498Szrj   src = SET_SRC (set);
139438fd1498Szrj   code = GET_CODE (src);
139538fd1498Szrj 
139638fd1498Szrj   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
139738fd1498Szrj     return NULL;
139838fd1498Szrj 
139938fd1498Szrj   if (FLOAT_MODE_P (GET_MODE (dest)))
140038fd1498Szrj     {
140138fd1498Szrj       if (!flag_associative_math)
140238fd1498Szrj         return NULL;
140338fd1498Szrj       /* In the case of FMA, we're also changing the rounding.  */
140438fd1498Szrj       if (code == FMA && !flag_unsafe_math_optimizations)
140538fd1498Szrj 	return NULL;
140638fd1498Szrj     }
140738fd1498Szrj 
140838fd1498Szrj   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
140938fd1498Szrj      in MD.  But if there is no optab to generate the insn, we can not
141038fd1498Szrj      perform the variable expansion.  This can happen if an MD provides
141138fd1498Szrj      an insn but not a named pattern to generate it, for example to avoid
141238fd1498Szrj      producing code that needs additional mode switches like for x87/mmx.
141338fd1498Szrj 
141438fd1498Szrj      So we check have_insn_for which looks for an optab for the operation
141538fd1498Szrj      in SRC.  If it doesn't exist, we can't perform the expansion even
141638fd1498Szrj      though INSN is valid.  */
141738fd1498Szrj   if (!have_insn_for (code, GET_MODE (src)))
141838fd1498Szrj     return NULL;
141938fd1498Szrj 
142038fd1498Szrj   if (!REG_P (dest)
142138fd1498Szrj       && !(GET_CODE (dest) == SUBREG
142238fd1498Szrj            && REG_P (SUBREG_REG (dest))))
142338fd1498Szrj     return NULL;
142438fd1498Szrj 
142538fd1498Szrj   /* Find the accumulator use within the operation.  */
142638fd1498Szrj   if (code == FMA)
142738fd1498Szrj     {
142838fd1498Szrj       /* We only support accumulation via FMA in the ADD position.  */
142938fd1498Szrj       if (!rtx_equal_p  (dest, XEXP (src, 2)))
143038fd1498Szrj 	return NULL;
143138fd1498Szrj       accum_pos = 2;
143238fd1498Szrj     }
143338fd1498Szrj   else if (rtx_equal_p (dest, XEXP (src, 0)))
143438fd1498Szrj     accum_pos = 0;
143538fd1498Szrj   else if (rtx_equal_p (dest, XEXP (src, 1)))
143638fd1498Szrj     {
143738fd1498Szrj       /* The method of expansion that we are using; which includes the
143838fd1498Szrj 	 initialization of the expansions with zero and the summation of
143938fd1498Szrj          the expansions at the end of the computation will yield wrong
144038fd1498Szrj 	 results for (x = something - x) thus avoid using it in that case.  */
144138fd1498Szrj       if (code == MINUS)
144238fd1498Szrj 	return NULL;
144338fd1498Szrj       accum_pos = 1;
144438fd1498Szrj     }
144538fd1498Szrj   else
144638fd1498Szrj     return NULL;
144738fd1498Szrj 
144838fd1498Szrj   /* It must not otherwise be used.  */
144938fd1498Szrj   if (code == FMA)
145038fd1498Szrj     {
145138fd1498Szrj       if (rtx_referenced_p (dest, XEXP (src, 0))
145238fd1498Szrj 	  || rtx_referenced_p (dest, XEXP (src, 1)))
145338fd1498Szrj 	return NULL;
145438fd1498Szrj     }
145538fd1498Szrj   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
145638fd1498Szrj     return NULL;
145738fd1498Szrj 
145838fd1498Szrj   /* It must be used in exactly one insn.  */
145938fd1498Szrj   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
146038fd1498Szrj     return NULL;
146138fd1498Szrj 
146238fd1498Szrj   if (dump_file)
146338fd1498Szrj     {
146438fd1498Szrj       fprintf (dump_file, "\n;; Expanding Accumulator ");
146538fd1498Szrj       print_rtl (dump_file, dest);
146638fd1498Szrj       fprintf (dump_file, "\n");
146738fd1498Szrj     }
146838fd1498Szrj 
146938fd1498Szrj   if (debug_uses)
147038fd1498Szrj     /* Instead of resetting the debug insns, we could replace each
147138fd1498Szrj        debug use in the loop with the sum or product of all expanded
147238fd1498Szrj        accumulators.  Since we'll only know of all expansions at the
147338fd1498Szrj        end, we'd have to keep track of which vars_to_expand a debug
147438fd1498Szrj        insn in the loop references, take note of each copy of the
147538fd1498Szrj        debug insn during unrolling, and when it's all done, compute
147638fd1498Szrj        the sum or product of each variable and adjust the original
147738fd1498Szrj        debug insn and each copy thereof.  What a pain!  */
147838fd1498Szrj     reset_debug_uses_in_loop (loop, dest, debug_uses);
147938fd1498Szrj 
148038fd1498Szrj   /* Record the accumulator to expand.  */
148138fd1498Szrj   ves = XNEW (struct var_to_expand);
148238fd1498Szrj   ves->insn = insn;
148338fd1498Szrj   ves->reg = copy_rtx (dest);
148438fd1498Szrj   ves->var_expansions.create (1);
148538fd1498Szrj   ves->next = NULL;
148638fd1498Szrj   ves->op = GET_CODE (src);
148738fd1498Szrj   ves->expansion_count = 0;
148838fd1498Szrj   ves->reuse_expansion = 0;
148938fd1498Szrj   return ves;
149038fd1498Szrj }
149138fd1498Szrj 
149238fd1498Szrj /* Determine whether there is an induction variable in INSN that
149338fd1498Szrj    we would like to split during unrolling.
149438fd1498Szrj 
149538fd1498Szrj    I.e. replace
149638fd1498Szrj 
149738fd1498Szrj    i = i + 1;
149838fd1498Szrj    ...
149938fd1498Szrj    i = i + 1;
150038fd1498Szrj    ...
150138fd1498Szrj    i = i + 1;
150238fd1498Szrj    ...
150338fd1498Szrj 
150438fd1498Szrj    type chains by
150538fd1498Szrj 
150638fd1498Szrj    i0 = i + 1
150738fd1498Szrj    ...
150838fd1498Szrj    i = i0 + 1
150938fd1498Szrj    ...
151038fd1498Szrj    i = i0 + 2
151138fd1498Szrj    ...
151238fd1498Szrj 
151338fd1498Szrj    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
151438fd1498Szrj    an IV_TO_SPLIT structure, fill it with the relevant information and return a
151538fd1498Szrj    pointer to it.  */
151638fd1498Szrj 
151738fd1498Szrj static struct iv_to_split *
analyze_iv_to_split_insn(rtx_insn * insn)151838fd1498Szrj analyze_iv_to_split_insn (rtx_insn *insn)
151938fd1498Szrj {
152038fd1498Szrj   rtx set, dest;
152138fd1498Szrj   struct rtx_iv iv;
152238fd1498Szrj   struct iv_to_split *ivts;
152338fd1498Szrj   scalar_int_mode mode;
152438fd1498Szrj   bool ok;
152538fd1498Szrj 
152638fd1498Szrj   /* For now we just split the basic induction variables.  Later this may be
152738fd1498Szrj      extended for example by selecting also addresses of memory references.  */
152838fd1498Szrj   set = single_set (insn);
152938fd1498Szrj   if (!set)
153038fd1498Szrj     return NULL;
153138fd1498Szrj 
153238fd1498Szrj   dest = SET_DEST (set);
153338fd1498Szrj   if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
153438fd1498Szrj     return NULL;
153538fd1498Szrj 
153638fd1498Szrj   if (!biv_p (insn, mode, dest))
153738fd1498Szrj     return NULL;
153838fd1498Szrj 
153938fd1498Szrj   ok = iv_analyze_result (insn, dest, &iv);
154038fd1498Szrj 
154138fd1498Szrj   /* This used to be an assert under the assumption that if biv_p returns
154238fd1498Szrj      true that iv_analyze_result must also return true.  However, that
154338fd1498Szrj      assumption is not strictly correct as evidenced by pr25569.
154438fd1498Szrj 
154538fd1498Szrj      Returning NULL when iv_analyze_result returns false is safe and
154638fd1498Szrj      avoids the problems in pr25569 until the iv_analyze_* routines
154738fd1498Szrj      can be fixed, which is apparently hard and time consuming
154838fd1498Szrj      according to their author.  */
154938fd1498Szrj   if (! ok)
155038fd1498Szrj     return NULL;
155138fd1498Szrj 
155238fd1498Szrj   if (iv.step == const0_rtx
155338fd1498Szrj       || iv.mode != iv.extend_mode)
155438fd1498Szrj     return NULL;
155538fd1498Szrj 
155638fd1498Szrj   /* Record the insn to split.  */
155738fd1498Szrj   ivts = XNEW (struct iv_to_split);
155838fd1498Szrj   ivts->insn = insn;
155938fd1498Szrj   ivts->orig_var = dest;
156038fd1498Szrj   ivts->base_var = NULL_RTX;
156138fd1498Szrj   ivts->step = iv.step;
156238fd1498Szrj   ivts->next = NULL;
156338fd1498Szrj 
156438fd1498Szrj   return ivts;
156538fd1498Szrj }
156638fd1498Szrj 
156738fd1498Szrj /* Determines which of insns in LOOP can be optimized.
156838fd1498Szrj    Return a OPT_INFO struct with the relevant hash tables filled
156938fd1498Szrj    with all insns to be optimized.  The FIRST_NEW_BLOCK field
157038fd1498Szrj    is undefined for the return value.  */
157138fd1498Szrj 
157238fd1498Szrj static struct opt_info *
analyze_insns_in_loop(struct loop * loop)157338fd1498Szrj analyze_insns_in_loop (struct loop *loop)
157438fd1498Szrj {
157538fd1498Szrj   basic_block *body, bb;
157638fd1498Szrj   unsigned i;
157738fd1498Szrj   struct opt_info *opt_info = XCNEW (struct opt_info);
157838fd1498Szrj   rtx_insn *insn;
157938fd1498Szrj   struct iv_to_split *ivts = NULL;
158038fd1498Szrj   struct var_to_expand *ves = NULL;
158138fd1498Szrj   iv_to_split **slot1;
158238fd1498Szrj   var_to_expand **slot2;
158338fd1498Szrj   vec<edge> edges = get_loop_exit_edges (loop);
158438fd1498Szrj   edge exit;
158538fd1498Szrj   bool can_apply = false;
158638fd1498Szrj 
158738fd1498Szrj   iv_analysis_loop_init (loop);
158838fd1498Szrj 
158938fd1498Szrj   body = get_loop_body (loop);
159038fd1498Szrj 
159138fd1498Szrj   if (flag_split_ivs_in_unroller)
159238fd1498Szrj     {
159338fd1498Szrj       opt_info->insns_to_split
159438fd1498Szrj        	= new hash_table<iv_split_hasher> (5 * loop->num_nodes);
159538fd1498Szrj       opt_info->iv_to_split_head = NULL;
159638fd1498Szrj       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
159738fd1498Szrj     }
159838fd1498Szrj 
159938fd1498Szrj   /* Record the loop exit bb and loop preheader before the unrolling.  */
160038fd1498Szrj   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
160138fd1498Szrj 
160238fd1498Szrj   if (edges.length () == 1)
160338fd1498Szrj     {
160438fd1498Szrj       exit = edges[0];
160538fd1498Szrj       if (!(exit->flags & EDGE_COMPLEX))
160638fd1498Szrj 	{
160738fd1498Szrj 	  opt_info->loop_exit = split_edge (exit);
160838fd1498Szrj 	  can_apply = true;
160938fd1498Szrj 	}
161038fd1498Szrj     }
161138fd1498Szrj 
161238fd1498Szrj   if (flag_variable_expansion_in_unroller
161338fd1498Szrj       && can_apply)
161438fd1498Szrj     {
161538fd1498Szrj       opt_info->insns_with_var_to_expand
161638fd1498Szrj        	= new hash_table<var_expand_hasher> (5 * loop->num_nodes);
161738fd1498Szrj       opt_info->var_to_expand_head = NULL;
161838fd1498Szrj       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
161938fd1498Szrj     }
162038fd1498Szrj 
162138fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
162238fd1498Szrj     {
162338fd1498Szrj       bb = body[i];
162438fd1498Szrj       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
162538fd1498Szrj 	continue;
162638fd1498Szrj 
162738fd1498Szrj       FOR_BB_INSNS (bb, insn)
162838fd1498Szrj       {
162938fd1498Szrj         if (!INSN_P (insn))
163038fd1498Szrj           continue;
163138fd1498Szrj 
163238fd1498Szrj         if (opt_info->insns_to_split)
163338fd1498Szrj           ivts = analyze_iv_to_split_insn (insn);
163438fd1498Szrj 
163538fd1498Szrj         if (ivts)
163638fd1498Szrj           {
163738fd1498Szrj             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
163838fd1498Szrj 	    gcc_assert (*slot1 == NULL);
163938fd1498Szrj             *slot1 = ivts;
164038fd1498Szrj 	    *opt_info->iv_to_split_tail = ivts;
164138fd1498Szrj 	    opt_info->iv_to_split_tail = &ivts->next;
164238fd1498Szrj             continue;
164338fd1498Szrj           }
164438fd1498Szrj 
164538fd1498Szrj         if (opt_info->insns_with_var_to_expand)
164638fd1498Szrj           ves = analyze_insn_to_expand_var (loop, insn);
164738fd1498Szrj 
164838fd1498Szrj         if (ves)
164938fd1498Szrj           {
165038fd1498Szrj             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
165138fd1498Szrj 	    gcc_assert (*slot2 == NULL);
165238fd1498Szrj             *slot2 = ves;
165338fd1498Szrj 	    *opt_info->var_to_expand_tail = ves;
165438fd1498Szrj 	    opt_info->var_to_expand_tail = &ves->next;
165538fd1498Szrj           }
165638fd1498Szrj       }
165738fd1498Szrj     }
165838fd1498Szrj 
165938fd1498Szrj   edges.release ();
166038fd1498Szrj   free (body);
166138fd1498Szrj   return opt_info;
166238fd1498Szrj }
166338fd1498Szrj 
166438fd1498Szrj /* Called just before loop duplication.  Records start of duplicated area
166538fd1498Szrj    to OPT_INFO.  */
166638fd1498Szrj 
166738fd1498Szrj static void
opt_info_start_duplication(struct opt_info * opt_info)166838fd1498Szrj opt_info_start_duplication (struct opt_info *opt_info)
166938fd1498Szrj {
167038fd1498Szrj   if (opt_info)
167138fd1498Szrj     opt_info->first_new_block = last_basic_block_for_fn (cfun);
167238fd1498Szrj }
167338fd1498Szrj 
167438fd1498Szrj /* Determine the number of iterations between initialization of the base
167538fd1498Szrj    variable and the current copy (N_COPY).  N_COPIES is the total number
167638fd1498Szrj    of newly created copies.  UNROLLING is true if we are unrolling
167738fd1498Szrj    (not peeling) the loop.  */
167838fd1498Szrj 
167938fd1498Szrj static unsigned
determine_split_iv_delta(unsigned n_copy,unsigned n_copies,bool unrolling)168038fd1498Szrj determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
168138fd1498Szrj {
168238fd1498Szrj   if (unrolling)
168338fd1498Szrj     {
168438fd1498Szrj       /* If we are unrolling, initialization is done in the original loop
168538fd1498Szrj 	 body (number 0).  */
168638fd1498Szrj       return n_copy;
168738fd1498Szrj     }
168838fd1498Szrj   else
168938fd1498Szrj     {
169038fd1498Szrj       /* If we are peeling, the copy in that the initialization occurs has
169138fd1498Szrj 	 number 1.  The original loop (number 0) is the last.  */
169238fd1498Szrj       if (n_copy)
169338fd1498Szrj 	return n_copy - 1;
169438fd1498Szrj       else
169538fd1498Szrj 	return n_copies;
169638fd1498Szrj     }
169738fd1498Szrj }
169838fd1498Szrj 
169938fd1498Szrj /* Allocate basic variable for the induction variable chain.  */
170038fd1498Szrj 
170138fd1498Szrj static void
allocate_basic_variable(struct iv_to_split * ivts)170238fd1498Szrj allocate_basic_variable (struct iv_to_split *ivts)
170338fd1498Szrj {
170438fd1498Szrj   rtx expr = SET_SRC (single_set (ivts->insn));
170538fd1498Szrj 
170638fd1498Szrj   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
170738fd1498Szrj }
170838fd1498Szrj 
170938fd1498Szrj /* Insert initialization of basic variable of IVTS before INSN, taking
171038fd1498Szrj    the initial value from INSN.  */
171138fd1498Szrj 
171238fd1498Szrj static void
insert_base_initialization(struct iv_to_split * ivts,rtx_insn * insn)171338fd1498Szrj insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
171438fd1498Szrj {
171538fd1498Szrj   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
171638fd1498Szrj   rtx_insn *seq;
171738fd1498Szrj 
171838fd1498Szrj   start_sequence ();
171938fd1498Szrj   expr = force_operand (expr, ivts->base_var);
172038fd1498Szrj   if (expr != ivts->base_var)
172138fd1498Szrj     emit_move_insn (ivts->base_var, expr);
172238fd1498Szrj   seq = get_insns ();
172338fd1498Szrj   end_sequence ();
172438fd1498Szrj 
172538fd1498Szrj   emit_insn_before (seq, insn);
172638fd1498Szrj }
172738fd1498Szrj 
172838fd1498Szrj /* Replace the use of induction variable described in IVTS in INSN
172938fd1498Szrj    by base variable + DELTA * step.  */
173038fd1498Szrj 
173138fd1498Szrj static void
split_iv(struct iv_to_split * ivts,rtx_insn * insn,unsigned delta)173238fd1498Szrj split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
173338fd1498Szrj {
173438fd1498Szrj   rtx expr, *loc, incr, var;
173538fd1498Szrj   rtx_insn *seq;
173638fd1498Szrj   machine_mode mode = GET_MODE (ivts->base_var);
173738fd1498Szrj   rtx src, dest, set;
173838fd1498Szrj 
173938fd1498Szrj   /* Construct base + DELTA * step.  */
174038fd1498Szrj   if (!delta)
174138fd1498Szrj     expr = ivts->base_var;
174238fd1498Szrj   else
174338fd1498Szrj     {
174438fd1498Szrj       incr = simplify_gen_binary (MULT, mode,
174538fd1498Szrj 				  copy_rtx (ivts->step),
174638fd1498Szrj 				  gen_int_mode (delta, mode));
174738fd1498Szrj       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
174838fd1498Szrj 				  ivts->base_var, incr);
174938fd1498Szrj     }
175038fd1498Szrj 
175138fd1498Szrj   /* Figure out where to do the replacement.  */
175238fd1498Szrj   loc = &SET_SRC (single_set (insn));
175338fd1498Szrj 
175438fd1498Szrj   /* If we can make the replacement right away, we're done.  */
175538fd1498Szrj   if (validate_change (insn, loc, expr, 0))
175638fd1498Szrj     return;
175738fd1498Szrj 
175838fd1498Szrj   /* Otherwise, force EXPR into a register and try again.  */
175938fd1498Szrj   start_sequence ();
176038fd1498Szrj   var = gen_reg_rtx (mode);
176138fd1498Szrj   expr = force_operand (expr, var);
176238fd1498Szrj   if (expr != var)
176338fd1498Szrj     emit_move_insn (var, expr);
176438fd1498Szrj   seq = get_insns ();
176538fd1498Szrj   end_sequence ();
176638fd1498Szrj   emit_insn_before (seq, insn);
176738fd1498Szrj 
176838fd1498Szrj   if (validate_change (insn, loc, var, 0))
176938fd1498Szrj     return;
177038fd1498Szrj 
177138fd1498Szrj   /* The last chance.  Try recreating the assignment in insn
177238fd1498Szrj      completely from scratch.  */
177338fd1498Szrj   set = single_set (insn);
177438fd1498Szrj   gcc_assert (set);
177538fd1498Szrj 
177638fd1498Szrj   start_sequence ();
177738fd1498Szrj   *loc = var;
177838fd1498Szrj   src = copy_rtx (SET_SRC (set));
177938fd1498Szrj   dest = copy_rtx (SET_DEST (set));
178038fd1498Szrj   src = force_operand (src, dest);
178138fd1498Szrj   if (src != dest)
178238fd1498Szrj     emit_move_insn (dest, src);
178338fd1498Szrj   seq = get_insns ();
178438fd1498Szrj   end_sequence ();
178538fd1498Szrj 
178638fd1498Szrj   emit_insn_before (seq, insn);
178738fd1498Szrj   delete_insn (insn);
178838fd1498Szrj }
178938fd1498Szrj 
179038fd1498Szrj 
179138fd1498Szrj /* Return one expansion of the accumulator recorded in struct VE.  */
179238fd1498Szrj 
179338fd1498Szrj static rtx
get_expansion(struct var_to_expand * ve)179438fd1498Szrj get_expansion (struct var_to_expand *ve)
179538fd1498Szrj {
179638fd1498Szrj   rtx reg;
179738fd1498Szrj 
179838fd1498Szrj   if (ve->reuse_expansion == 0)
179938fd1498Szrj     reg = ve->reg;
180038fd1498Szrj   else
180138fd1498Szrj     reg = ve->var_expansions[ve->reuse_expansion - 1];
180238fd1498Szrj 
180338fd1498Szrj   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
180438fd1498Szrj     ve->reuse_expansion = 0;
180538fd1498Szrj   else
180638fd1498Szrj     ve->reuse_expansion++;
180738fd1498Szrj 
180838fd1498Szrj   return reg;
180938fd1498Szrj }
181038fd1498Szrj 
181138fd1498Szrj 
181238fd1498Szrj /* Given INSN replace the uses of the accumulator recorded in VE
181338fd1498Szrj    with a new register.  */
181438fd1498Szrj 
181538fd1498Szrj static void
expand_var_during_unrolling(struct var_to_expand * ve,rtx_insn * insn)181638fd1498Szrj expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
181738fd1498Szrj {
181838fd1498Szrj   rtx new_reg, set;
181938fd1498Szrj   bool really_new_expansion = false;
182038fd1498Szrj 
182138fd1498Szrj   set = single_set (insn);
182238fd1498Szrj   gcc_assert (set);
182338fd1498Szrj 
182438fd1498Szrj   /* Generate a new register only if the expansion limit has not been
182538fd1498Szrj      reached.  Else reuse an already existing expansion.  */
182638fd1498Szrj   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
182738fd1498Szrj     {
182838fd1498Szrj       really_new_expansion = true;
182938fd1498Szrj       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
183038fd1498Szrj     }
183138fd1498Szrj   else
183238fd1498Szrj     new_reg = get_expansion (ve);
183338fd1498Szrj 
183438fd1498Szrj   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
183538fd1498Szrj   if (apply_change_group ())
183638fd1498Szrj     if (really_new_expansion)
183738fd1498Szrj       {
183838fd1498Szrj         ve->var_expansions.safe_push (new_reg);
183938fd1498Szrj         ve->expansion_count++;
184038fd1498Szrj       }
184138fd1498Szrj }
184238fd1498Szrj 
184338fd1498Szrj /* Initialize the variable expansions in loop preheader.  PLACE is the
184438fd1498Szrj    loop-preheader basic block where the initialization of the
184538fd1498Szrj    expansions should take place.  The expansions are initialized with
184638fd1498Szrj    (-0) when the operation is plus or minus to honor sign zero.  This
184738fd1498Szrj    way we can prevent cases where the sign of the final result is
184838fd1498Szrj    effected by the sign of the expansion.  Here is an example to
184938fd1498Szrj    demonstrate this:
185038fd1498Szrj 
185138fd1498Szrj    for (i = 0 ; i < n; i++)
185238fd1498Szrj      sum += something;
185338fd1498Szrj 
185438fd1498Szrj    ==>
185538fd1498Szrj 
185638fd1498Szrj    sum += something
185738fd1498Szrj    ....
185838fd1498Szrj    i = i+1;
185938fd1498Szrj    sum1 += something
186038fd1498Szrj    ....
186138fd1498Szrj    i = i+1
186238fd1498Szrj    sum2 += something;
186338fd1498Szrj    ....
186438fd1498Szrj 
186538fd1498Szrj    When SUM is initialized with -zero and SOMETHING is also -zero; the
186638fd1498Szrj    final result of sum should be -zero thus the expansions sum1 and sum2
186738fd1498Szrj    should be initialized with -zero as well (otherwise we will get +zero
186838fd1498Szrj    as the final result).  */
186938fd1498Szrj 
187038fd1498Szrj static void
insert_var_expansion_initialization(struct var_to_expand * ve,basic_block place)187138fd1498Szrj insert_var_expansion_initialization (struct var_to_expand *ve,
187238fd1498Szrj 				     basic_block place)
187338fd1498Szrj {
187438fd1498Szrj   rtx_insn *seq;
187538fd1498Szrj   rtx var, zero_init;
187638fd1498Szrj   unsigned i;
187738fd1498Szrj   machine_mode mode = GET_MODE (ve->reg);
187838fd1498Szrj   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
187938fd1498Szrj 
188038fd1498Szrj   if (ve->var_expansions.length () == 0)
188138fd1498Szrj     return;
188238fd1498Szrj 
188338fd1498Szrj   start_sequence ();
188438fd1498Szrj   switch (ve->op)
188538fd1498Szrj     {
188638fd1498Szrj     case FMA:
188738fd1498Szrj       /* Note that we only accumulate FMA via the ADD operand.  */
188838fd1498Szrj     case PLUS:
188938fd1498Szrj     case MINUS:
189038fd1498Szrj       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
189138fd1498Szrj         {
189238fd1498Szrj 	  if (honor_signed_zero_p)
189338fd1498Szrj 	    zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
189438fd1498Szrj 	  else
189538fd1498Szrj 	    zero_init = CONST0_RTX (mode);
189638fd1498Szrj           emit_move_insn (var, zero_init);
189738fd1498Szrj         }
189838fd1498Szrj       break;
189938fd1498Szrj 
190038fd1498Szrj     case MULT:
190138fd1498Szrj       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
190238fd1498Szrj         {
190338fd1498Szrj           zero_init = CONST1_RTX (GET_MODE (var));
190438fd1498Szrj           emit_move_insn (var, zero_init);
190538fd1498Szrj         }
190638fd1498Szrj       break;
190738fd1498Szrj 
190838fd1498Szrj     default:
190938fd1498Szrj       gcc_unreachable ();
191038fd1498Szrj     }
191138fd1498Szrj 
191238fd1498Szrj   seq = get_insns ();
191338fd1498Szrj   end_sequence ();
191438fd1498Szrj 
191538fd1498Szrj   emit_insn_after (seq, BB_END (place));
191638fd1498Szrj }
191738fd1498Szrj 
191838fd1498Szrj /* Combine the variable expansions at the loop exit.  PLACE is the
191938fd1498Szrj    loop exit basic block where the summation of the expansions should
192038fd1498Szrj    take place.  */
192138fd1498Szrj 
192238fd1498Szrj static void
combine_var_copies_in_loop_exit(struct var_to_expand * ve,basic_block place)192338fd1498Szrj combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
192438fd1498Szrj {
192538fd1498Szrj   rtx sum = ve->reg;
192638fd1498Szrj   rtx expr, var;
192738fd1498Szrj   rtx_insn *seq, *insn;
192838fd1498Szrj   unsigned i;
192938fd1498Szrj 
193038fd1498Szrj   if (ve->var_expansions.length () == 0)
193138fd1498Szrj     return;
193238fd1498Szrj 
193338fd1498Szrj   /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
193438fd1498Szrj      it both here and as the destination of the assignment.  */
193538fd1498Szrj   sum = copy_rtx (sum);
193638fd1498Szrj   start_sequence ();
193738fd1498Szrj   switch (ve->op)
193838fd1498Szrj     {
193938fd1498Szrj     case FMA:
194038fd1498Szrj       /* Note that we only accumulate FMA via the ADD operand.  */
194138fd1498Szrj     case PLUS:
194238fd1498Szrj     case MINUS:
194338fd1498Szrj       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
194438fd1498Szrj 	sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
194538fd1498Szrj       break;
194638fd1498Szrj 
194738fd1498Szrj     case MULT:
194838fd1498Szrj       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
194938fd1498Szrj 	sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
195038fd1498Szrj       break;
195138fd1498Szrj 
195238fd1498Szrj     default:
195338fd1498Szrj       gcc_unreachable ();
195438fd1498Szrj     }
195538fd1498Szrj 
195638fd1498Szrj   expr = force_operand (sum, ve->reg);
195738fd1498Szrj   if (expr != ve->reg)
195838fd1498Szrj     emit_move_insn (ve->reg, expr);
195938fd1498Szrj   seq = get_insns ();
196038fd1498Szrj   end_sequence ();
196138fd1498Szrj 
196238fd1498Szrj   insn = BB_HEAD (place);
196338fd1498Szrj   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
196438fd1498Szrj     insn = NEXT_INSN (insn);
196538fd1498Szrj 
196638fd1498Szrj   emit_insn_after (seq, insn);
196738fd1498Szrj }
196838fd1498Szrj 
196938fd1498Szrj /* Strip away REG_EQUAL notes for IVs we're splitting.
197038fd1498Szrj 
197138fd1498Szrj    Updating REG_EQUAL notes for IVs we split is tricky: We
197238fd1498Szrj    cannot tell until after unrolling, DF-rescanning, and liveness
197338fd1498Szrj    updating, whether an EQ_USE is reached by the split IV while
197438fd1498Szrj    the IV reg is still live.  See PR55006.
197538fd1498Szrj 
197638fd1498Szrj    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
197738fd1498Szrj    because RTL loop-iv requires us to defer rescanning insns and
197838fd1498Szrj    any notes attached to them.  So resort to old techniques...  */
197938fd1498Szrj 
198038fd1498Szrj static void
maybe_strip_eq_note_for_split_iv(struct opt_info * opt_info,rtx_insn * insn)198138fd1498Szrj maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
198238fd1498Szrj {
198338fd1498Szrj   struct iv_to_split *ivts;
198438fd1498Szrj   rtx note = find_reg_equal_equiv_note (insn);
198538fd1498Szrj   if (! note)
198638fd1498Szrj     return;
198738fd1498Szrj   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
198838fd1498Szrj     if (reg_mentioned_p (ivts->orig_var, note))
198938fd1498Szrj       {
199038fd1498Szrj 	remove_note (insn, note);
199138fd1498Szrj 	return;
199238fd1498Szrj       }
199338fd1498Szrj }
199438fd1498Szrj 
199538fd1498Szrj /* Apply loop optimizations in loop copies using the
199638fd1498Szrj    data which gathered during the unrolling.  Structure
199738fd1498Szrj    OPT_INFO record that data.
199838fd1498Szrj 
199938fd1498Szrj    UNROLLING is true if we unrolled (not peeled) the loop.
200038fd1498Szrj    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
200138fd1498Szrj    the loop (as it should happen in complete unrolling, but not in ordinary
200238fd1498Szrj    peeling of the loop).  */
200338fd1498Szrj 
200438fd1498Szrj static void
apply_opt_in_copies(struct opt_info * opt_info,unsigned n_copies,bool unrolling,bool rewrite_original_loop)200538fd1498Szrj apply_opt_in_copies (struct opt_info *opt_info,
200638fd1498Szrj                      unsigned n_copies, bool unrolling,
200738fd1498Szrj                      bool rewrite_original_loop)
200838fd1498Szrj {
200938fd1498Szrj   unsigned i, delta;
201038fd1498Szrj   basic_block bb, orig_bb;
201138fd1498Szrj   rtx_insn *insn, *orig_insn, *next;
201238fd1498Szrj   struct iv_to_split ivts_templ, *ivts;
201338fd1498Szrj   struct var_to_expand ve_templ, *ves;
201438fd1498Szrj 
201538fd1498Szrj   /* Sanity check -- we need to put initialization in the original loop
201638fd1498Szrj      body.  */
201738fd1498Szrj   gcc_assert (!unrolling || rewrite_original_loop);
201838fd1498Szrj 
201938fd1498Szrj   /* Allocate the basic variables (i0).  */
202038fd1498Szrj   if (opt_info->insns_to_split)
202138fd1498Szrj     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
202238fd1498Szrj       allocate_basic_variable (ivts);
202338fd1498Szrj 
202438fd1498Szrj   for (i = opt_info->first_new_block;
202538fd1498Szrj        i < (unsigned) last_basic_block_for_fn (cfun);
202638fd1498Szrj        i++)
202738fd1498Szrj     {
202838fd1498Szrj       bb = BASIC_BLOCK_FOR_FN (cfun, i);
202938fd1498Szrj       orig_bb = get_bb_original (bb);
203038fd1498Szrj 
203138fd1498Szrj       /* bb->aux holds position in copy sequence initialized by
203238fd1498Szrj 	 duplicate_loop_to_header_edge.  */
203338fd1498Szrj       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
203438fd1498Szrj 					unrolling);
203538fd1498Szrj       bb->aux = 0;
203638fd1498Szrj       orig_insn = BB_HEAD (orig_bb);
203738fd1498Szrj       FOR_BB_INSNS_SAFE (bb, insn, next)
203838fd1498Szrj         {
203938fd1498Szrj 	  if (!INSN_P (insn)
204038fd1498Szrj 	      || (DEBUG_BIND_INSN_P (insn)
204138fd1498Szrj 		  && INSN_VAR_LOCATION_DECL (insn)
204238fd1498Szrj 		  && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
204338fd1498Szrj             continue;
204438fd1498Szrj 
204538fd1498Szrj 	  while (!INSN_P (orig_insn)
204638fd1498Szrj 		 || (DEBUG_BIND_INSN_P (orig_insn)
204738fd1498Szrj 		     && INSN_VAR_LOCATION_DECL (orig_insn)
204838fd1498Szrj 		     && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
204938fd1498Szrj 			 == LABEL_DECL)))
205038fd1498Szrj             orig_insn = NEXT_INSN (orig_insn);
205138fd1498Szrj 
205238fd1498Szrj           ivts_templ.insn = orig_insn;
205338fd1498Szrj           ve_templ.insn = orig_insn;
205438fd1498Szrj 
205538fd1498Szrj           /* Apply splitting iv optimization.  */
205638fd1498Szrj           if (opt_info->insns_to_split)
205738fd1498Szrj             {
205838fd1498Szrj 	      maybe_strip_eq_note_for_split_iv (opt_info, insn);
205938fd1498Szrj 
206038fd1498Szrj               ivts = opt_info->insns_to_split->find (&ivts_templ);
206138fd1498Szrj 
206238fd1498Szrj               if (ivts)
206338fd1498Szrj                 {
206438fd1498Szrj 		  gcc_assert (GET_CODE (PATTERN (insn))
206538fd1498Szrj 			      == GET_CODE (PATTERN (orig_insn)));
206638fd1498Szrj 
206738fd1498Szrj                   if (!delta)
206838fd1498Szrj                     insert_base_initialization (ivts, insn);
206938fd1498Szrj                   split_iv (ivts, insn, delta);
207038fd1498Szrj                 }
207138fd1498Szrj             }
207238fd1498Szrj           /* Apply variable expansion optimization.  */
207338fd1498Szrj           if (unrolling && opt_info->insns_with_var_to_expand)
207438fd1498Szrj             {
207538fd1498Szrj               ves = (struct var_to_expand *)
207638fd1498Szrj 		opt_info->insns_with_var_to_expand->find (&ve_templ);
207738fd1498Szrj               if (ves)
207838fd1498Szrj                 {
207938fd1498Szrj 		  gcc_assert (GET_CODE (PATTERN (insn))
208038fd1498Szrj 			      == GET_CODE (PATTERN (orig_insn)));
208138fd1498Szrj                   expand_var_during_unrolling (ves, insn);
208238fd1498Szrj                 }
208338fd1498Szrj             }
208438fd1498Szrj           orig_insn = NEXT_INSN (orig_insn);
208538fd1498Szrj         }
208638fd1498Szrj     }
208738fd1498Szrj 
208838fd1498Szrj   if (!rewrite_original_loop)
208938fd1498Szrj     return;
209038fd1498Szrj 
209138fd1498Szrj   /* Initialize the variable expansions in the loop preheader
209238fd1498Szrj      and take care of combining them at the loop exit.  */
209338fd1498Szrj   if (opt_info->insns_with_var_to_expand)
209438fd1498Szrj     {
209538fd1498Szrj       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
209638fd1498Szrj 	insert_var_expansion_initialization (ves, opt_info->loop_preheader);
209738fd1498Szrj       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
209838fd1498Szrj 	combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
209938fd1498Szrj     }
210038fd1498Szrj 
210138fd1498Szrj   /* Rewrite also the original loop body.  Find them as originals of the blocks
210238fd1498Szrj      in the last copied iteration, i.e. those that have
210338fd1498Szrj      get_bb_copy (get_bb_original (bb)) == bb.  */
210438fd1498Szrj   for (i = opt_info->first_new_block;
210538fd1498Szrj        i < (unsigned) last_basic_block_for_fn (cfun);
210638fd1498Szrj        i++)
210738fd1498Szrj     {
210838fd1498Szrj       bb = BASIC_BLOCK_FOR_FN (cfun, i);
210938fd1498Szrj       orig_bb = get_bb_original (bb);
211038fd1498Szrj       if (get_bb_copy (orig_bb) != bb)
211138fd1498Szrj 	continue;
211238fd1498Szrj 
211338fd1498Szrj       delta = determine_split_iv_delta (0, n_copies, unrolling);
211438fd1498Szrj       for (orig_insn = BB_HEAD (orig_bb);
211538fd1498Szrj            orig_insn != NEXT_INSN (BB_END (bb));
211638fd1498Szrj            orig_insn = next)
211738fd1498Szrj         {
211838fd1498Szrj           next = NEXT_INSN (orig_insn);
211938fd1498Szrj 
212038fd1498Szrj           if (!INSN_P (orig_insn))
212138fd1498Szrj  	    continue;
212238fd1498Szrj 
212338fd1498Szrj           ivts_templ.insn = orig_insn;
212438fd1498Szrj           if (opt_info->insns_to_split)
212538fd1498Szrj             {
212638fd1498Szrj 	      maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
212738fd1498Szrj 
212838fd1498Szrj               ivts = (struct iv_to_split *)
212938fd1498Szrj 		opt_info->insns_to_split->find (&ivts_templ);
213038fd1498Szrj               if (ivts)
213138fd1498Szrj                 {
213238fd1498Szrj                   if (!delta)
213338fd1498Szrj                     insert_base_initialization (ivts, orig_insn);
213438fd1498Szrj                   split_iv (ivts, orig_insn, delta);
213538fd1498Szrj                   continue;
213638fd1498Szrj                 }
213738fd1498Szrj             }
213838fd1498Szrj 
213938fd1498Szrj         }
214038fd1498Szrj     }
214138fd1498Szrj }
214238fd1498Szrj 
214338fd1498Szrj /* Release OPT_INFO.  */
214438fd1498Szrj 
214538fd1498Szrj static void
free_opt_info(struct opt_info * opt_info)214638fd1498Szrj free_opt_info (struct opt_info *opt_info)
214738fd1498Szrj {
214838fd1498Szrj   delete opt_info->insns_to_split;
214938fd1498Szrj   opt_info->insns_to_split = NULL;
215038fd1498Szrj   if (opt_info->insns_with_var_to_expand)
215138fd1498Szrj     {
215238fd1498Szrj       struct var_to_expand *ves;
215338fd1498Szrj 
215438fd1498Szrj       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
215538fd1498Szrj 	ves->var_expansions.release ();
215638fd1498Szrj       delete opt_info->insns_with_var_to_expand;
215738fd1498Szrj       opt_info->insns_with_var_to_expand = NULL;
215838fd1498Szrj     }
215938fd1498Szrj   free (opt_info);
216038fd1498Szrj }
2161