1 /* Induction variable canonicalization and loop peeling.
2    Copyright (C) 2004-2016 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass detects the loops that iterate a constant number of times,
21    adds a canonical induction variable (step -1, tested against 0)
22    and replaces the exit test.  This enables the less powerful rtl
23    level analysis to use this information.
24 
25    This might spoil the code in some cases (by increasing register pressure).
26    Note that in the case the new variable is not needed, ivopts will get rid
27    of it, so it might only be a problem when there are no other linear induction
28    variables.  In that case the created optimization possibilities are likely
29    to pay up.
30 
31    We also perform
32      - complete unrolling (or peeling) when the loops is rolling few enough
33        times
34      - simple peeling (i.e. copying few initial iterations prior the loop)
35        when number of iteration estimate is known (typically by the profile
36        info).  */
37 
38 #include "config.h"
39 #include "system.h"
40 #include "coretypes.h"
41 #include "backend.h"
42 #include "tree.h"
43 #include "gimple.h"
44 #include "cfghooks.h"
45 #include "tree-pass.h"
46 #include "ssa.h"
47 #include "cgraph.h"
48 #include "gimple-pretty-print.h"
49 #include "fold-const.h"
50 #include "profile.h"
51 #include "gimple-fold.h"
52 #include "tree-eh.h"
53 #include "gimple-iterator.h"
54 #include "tree-cfg.h"
55 #include "tree-ssa-loop-manip.h"
56 #include "tree-ssa-loop-niter.h"
57 #include "tree-ssa-loop.h"
58 #include "tree-into-ssa.h"
59 #include "cfgloop.h"
60 #include "tree-chrec.h"
61 #include "tree-scalar-evolution.h"
62 #include "params.h"
63 #include "tree-inline.h"
64 #include "tree-cfgcleanup.h"
65 #include "builtins.h"
66 
67 /* Specifies types of loops that may be unrolled.  */
68 
69 enum unroll_level
70 {
71   UL_SINGLE_ITER,	/* Only loops that exit immediately in the first
72 			   iteration.  */
73   UL_NO_GROWTH,		/* Only loops whose unrolling will not cause increase
74 			   of code size.  */
75   UL_ALL		/* All suitable loops.  */
76 };
77 
78 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
79    is the exit edge whose condition is replaced.  */
80 
81 static void
create_canonical_iv(struct loop * loop,edge exit,tree niter)82 create_canonical_iv (struct loop *loop, edge exit, tree niter)
83 {
84   edge in;
85   tree type, var;
86   gcond *cond;
87   gimple_stmt_iterator incr_at;
88   enum tree_code cmp;
89 
90   if (dump_file && (dump_flags & TDF_DETAILS))
91     {
92       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
93       print_generic_expr (dump_file, niter, TDF_SLIM);
94       fprintf (dump_file, " iterations.\n");
95     }
96 
97   cond = as_a <gcond *> (last_stmt (exit->src));
98   in = EDGE_SUCC (exit->src, 0);
99   if (in == exit)
100     in = EDGE_SUCC (exit->src, 1);
101 
102   /* Note that we do not need to worry about overflows, since
103      type of niter is always unsigned and all comparisons are
104      just for equality/nonequality -- i.e. everything works
105      with a modulo arithmetics.  */
106 
107   type = TREE_TYPE (niter);
108   niter = fold_build2 (PLUS_EXPR, type,
109 		       niter,
110 		       build_int_cst (type, 1));
111   incr_at = gsi_last_bb (in->src);
112   create_iv (niter,
113 	     build_int_cst (type, -1),
114 	     NULL_TREE, loop,
115 	     &incr_at, false, NULL, &var);
116 
117   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
118   gimple_cond_set_code (cond, cmp);
119   gimple_cond_set_lhs (cond, var);
120   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
121   update_stmt (cond);
122 }
123 
124 /* Describe size of loop as detected by tree_estimate_loop_size.  */
125 struct loop_size
126 {
127   /* Number of instructions in the loop.  */
128   int overall;
129 
130   /* Number of instructions that will be likely optimized out in
131      peeled iterations of loop  (i.e. computation based on induction
132      variable where induction variable starts at known constant.)  */
133   int eliminated_by_peeling;
134 
135   /* Same statistics for last iteration of loop: it is smaller because
136      instructions after exit are not executed.  */
137   int last_iteration;
138   int last_iteration_eliminated_by_peeling;
139 
140   /* If some IV computation will become constant.  */
141   bool constant_iv;
142 
143   /* Number of call stmts that are not a builtin and are pure or const
144      present on the hot path.  */
145   int num_pure_calls_on_hot_path;
146   /* Number of call stmts that are not a builtin and are not pure nor const
147      present on the hot path.  */
148   int num_non_pure_calls_on_hot_path;
149   /* Number of statements other than calls in the loop.  */
150   int non_call_stmts_on_hot_path;
151   /* Number of branches seen on the hot path.  */
152   int num_branches_on_hot_path;
153 };
154 
155 /* Return true if OP in STMT will be constant after peeling LOOP.  */
156 
157 static bool
constant_after_peeling(tree op,gimple * stmt,struct loop * loop)158 constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
159 {
160   affine_iv iv;
161 
162   if (is_gimple_min_invariant (op))
163     return true;
164 
165   /* We can still fold accesses to constant arrays when index is known.  */
166   if (TREE_CODE (op) != SSA_NAME)
167     {
168       tree base = op;
169 
170       /* First make fast look if we see constant array inside.  */
171       while (handled_component_p (base))
172 	base = TREE_OPERAND (base, 0);
173       if ((DECL_P (base)
174 	   && ctor_for_folding (base) != error_mark_node)
175 	  || CONSTANT_CLASS_P (base))
176 	{
177 	  /* If so, see if we understand all the indices.  */
178 	  base = op;
179 	  while (handled_component_p (base))
180 	    {
181 	      if (TREE_CODE (base) == ARRAY_REF
182 		  && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
183 		return false;
184 	      base = TREE_OPERAND (base, 0);
185 	    }
186 	  return true;
187 	}
188       return false;
189     }
190 
191   /* Induction variables are constants.  */
192   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
193     return false;
194   if (!is_gimple_min_invariant (iv.base))
195     return false;
196   if (!is_gimple_min_invariant (iv.step))
197     return false;
198   return true;
199 }
200 
201 /* Computes an estimated number of insns in LOOP.
202    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
203    iteration of the loop.
204    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
205    of loop.
206    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
207    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
208 
209 static bool
tree_estimate_loop_size(struct loop * loop,edge exit,edge edge_to_cancel,struct loop_size * size,int upper_bound)210 tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size,
211 			 int upper_bound)
212 {
213   basic_block *body = get_loop_body (loop);
214   gimple_stmt_iterator gsi;
215   unsigned int i;
216   bool after_exit;
217   vec<basic_block> path = get_loop_hot_path (loop);
218 
219   size->overall = 0;
220   size->eliminated_by_peeling = 0;
221   size->last_iteration = 0;
222   size->last_iteration_eliminated_by_peeling = 0;
223   size->num_pure_calls_on_hot_path = 0;
224   size->num_non_pure_calls_on_hot_path = 0;
225   size->non_call_stmts_on_hot_path = 0;
226   size->num_branches_on_hot_path = 0;
227   size->constant_iv = 0;
228 
229   if (dump_file && (dump_flags & TDF_DETAILS))
230     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
231   for (i = 0; i < loop->num_nodes; i++)
232     {
233       if (edge_to_cancel && body[i] != edge_to_cancel->src
234 	  && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
235 	after_exit = true;
236       else
237 	after_exit = false;
238       if (dump_file && (dump_flags & TDF_DETAILS))
239 	fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
240 
241       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
242 	{
243 	  gimple *stmt = gsi_stmt (gsi);
244 	  int num = estimate_num_insns (stmt, &eni_size_weights);
245 	  bool likely_eliminated = false;
246 	  bool likely_eliminated_last = false;
247 	  bool likely_eliminated_peeled = false;
248 
249 	  if (dump_file && (dump_flags & TDF_DETAILS))
250 	    {
251 	      fprintf (dump_file, "  size: %3i ", num);
252 	      print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
253 	    }
254 
255 	  /* Look for reasons why we might optimize this stmt away. */
256 
257 	  if (gimple_has_side_effects (stmt))
258 	    ;
259 	  /* Exit conditional.  */
260 	  else if (exit && body[i] == exit->src
261 		   && stmt == last_stmt (exit->src))
262 	    {
263 	      if (dump_file && (dump_flags & TDF_DETAILS))
264 	        fprintf (dump_file, "   Exit condition will be eliminated "
265 			 "in peeled copies.\n");
266 	      likely_eliminated_peeled = true;
267 	    }
268 	  else if (edge_to_cancel && body[i] == edge_to_cancel->src
269 		   && stmt == last_stmt (edge_to_cancel->src))
270 	    {
271 	      if (dump_file && (dump_flags & TDF_DETAILS))
272 	        fprintf (dump_file, "   Exit condition will be eliminated "
273 			 "in last copy.\n");
274 	      likely_eliminated_last = true;
275 	    }
276 	  /* Sets of IV variables  */
277 	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
278 	      && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
279 	    {
280 	      if (dump_file && (dump_flags & TDF_DETAILS))
281 	        fprintf (dump_file, "   Induction variable computation will"
282 			 " be folded away.\n");
283 	      likely_eliminated = true;
284 	    }
285 	  /* Assignments of IV variables.  */
286 	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
287 		   && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
288 		   && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt, loop)
289 		   && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
290 		       || constant_after_peeling (gimple_assign_rhs2 (stmt),
291 		       				  stmt, loop)))
292 	    {
293 	      size->constant_iv = true;
294 	      if (dump_file && (dump_flags & TDF_DETAILS))
295 	        fprintf (dump_file, "   Constant expression will be folded away.\n");
296 	      likely_eliminated = true;
297 	    }
298 	  /* Conditionals.  */
299 	  else if ((gimple_code (stmt) == GIMPLE_COND
300 		    && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
301 		    && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)
302 		    /* We don't simplify all constant compares so make sure
303 		       they are not both constant already.  See PR70288.  */
304 		    && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
305 			|| ! is_gimple_min_invariant (gimple_cond_rhs (stmt))))
306 		   || (gimple_code (stmt) == GIMPLE_SWITCH
307 		       && constant_after_peeling (gimple_switch_index (
308 						    as_a <gswitch *> (stmt)),
309 						  stmt, loop)
310 		       && ! is_gimple_min_invariant (gimple_switch_index (
311 						       as_a <gswitch *> (stmt)))))
312 	    {
313 	      if (dump_file && (dump_flags & TDF_DETAILS))
314 	        fprintf (dump_file, "   Constant conditional.\n");
315 	      likely_eliminated = true;
316 	    }
317 
318 	  size->overall += num;
319 	  if (likely_eliminated || likely_eliminated_peeled)
320 	    size->eliminated_by_peeling += num;
321 	  if (!after_exit)
322 	    {
323 	      size->last_iteration += num;
324 	      if (likely_eliminated || likely_eliminated_last)
325 		size->last_iteration_eliminated_by_peeling += num;
326 	    }
327 	  if ((size->overall * 3 / 2 - size->eliminated_by_peeling
328 	      - size->last_iteration_eliminated_by_peeling) > upper_bound)
329 	    {
330               free (body);
331 	      path.release ();
332 	      return true;
333 	    }
334 	}
335     }
336   while (path.length ())
337     {
338       basic_block bb = path.pop ();
339       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
340 	{
341 	  gimple *stmt = gsi_stmt (gsi);
342 	  if (gimple_code (stmt) == GIMPLE_CALL)
343 	    {
344 	      int flags = gimple_call_flags (stmt);
345 	      tree decl = gimple_call_fndecl (stmt);
346 
347 	      if (decl && DECL_IS_BUILTIN (decl)
348 		  && is_inexpensive_builtin (decl))
349 		;
350 	      else if (flags & (ECF_PURE | ECF_CONST))
351 		size->num_pure_calls_on_hot_path++;
352 	      else
353 		size->num_non_pure_calls_on_hot_path++;
354 	      size->num_branches_on_hot_path ++;
355 	    }
356 	  else if (gimple_code (stmt) != GIMPLE_CALL
357 		   && gimple_code (stmt) != GIMPLE_DEBUG)
358 	    size->non_call_stmts_on_hot_path++;
359 	  if (((gimple_code (stmt) == GIMPLE_COND
360 	        && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
361 		    || constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)))
362 	       || (gimple_code (stmt) == GIMPLE_SWITCH
363 		   && !constant_after_peeling (gimple_switch_index (
364 						 as_a <gswitch *> (stmt)),
365 					       stmt, loop)))
366 	      && (!exit || bb != exit->src))
367 	    size->num_branches_on_hot_path++;
368 	}
369     }
370   path.release ();
371   if (dump_file && (dump_flags & TDF_DETAILS))
372     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
373     	     size->eliminated_by_peeling, size->last_iteration,
374 	     size->last_iteration_eliminated_by_peeling);
375 
376   free (body);
377   return false;
378 }
379 
380 /* Estimate number of insns of completely unrolled loop.
381    It is (NUNROLL + 1) * size of loop body with taking into account
382    the fact that in last copy everything after exit conditional
383    is dead and that some instructions will be eliminated after
384    peeling.
385 
386    Loop body is likely going to simplify further, this is difficult
387    to guess, we just decrease the result by 1/3.  */
388 
389 static unsigned HOST_WIDE_INT
estimated_unrolled_size(struct loop_size * size,unsigned HOST_WIDE_INT nunroll)390 estimated_unrolled_size (struct loop_size *size,
391 			 unsigned HOST_WIDE_INT nunroll)
392 {
393   HOST_WIDE_INT unr_insns = ((nunroll)
394   			     * (HOST_WIDE_INT) (size->overall
395 			     			- size->eliminated_by_peeling));
396   if (!nunroll)
397     unr_insns = 0;
398   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
399 
400   unr_insns = unr_insns * 2 / 3;
401   if (unr_insns <= 0)
402     unr_insns = 1;
403 
404   return unr_insns;
405 }
406 
407 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
408    body that can be remove to make the loop to always exit and at
409    the same time it does not make any code potentially executed
410    during the last iteration dead.
411 
412    After complete unrolling we still may get rid of the conditional
413    on the exit in the last copy even if we have no idea what it does.
414    This is quite common case for loops of form
415 
416      int a[5];
417      for (i=0;i<b;i++)
418        a[i]=0;
419 
420    Here we prove the loop to iterate 5 times but we do not know
421    it from induction variable.
422 
423    For now we handle only simple case where there is exit condition
424    just before the latch block and the latch block contains no statements
425    with side effect that may otherwise terminate the execution of loop
426    (such as by EH or by terminating the program or longjmp).
427 
428    In the general case we may want to cancel the paths leading to statements
429    loop-niter identified as having undefined effect in the last iteration.
430    The other cases are hopefully rare and will be cleaned up later.  */
431 
432 static edge
loop_edge_to_cancel(struct loop * loop)433 loop_edge_to_cancel (struct loop *loop)
434 {
435   vec<edge> exits;
436   unsigned i;
437   edge edge_to_cancel;
438   gimple_stmt_iterator gsi;
439 
440   /* We want only one predecestor of the loop.  */
441   if (EDGE_COUNT (loop->latch->preds) > 1)
442     return NULL;
443 
444   exits = get_loop_exit_edges (loop);
445 
446   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
447     {
448        /* Find the other edge than the loop exit
449           leaving the conditoinal.  */
450        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
451          continue;
452        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
453          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
454        else
455          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
456 
457       /* We only can handle conditionals.  */
458       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
459 	continue;
460 
461       /* We should never have conditionals in the loop latch. */
462       gcc_assert (edge_to_cancel->dest != loop->header);
463 
464       /* Check that it leads to loop latch.  */
465       if (edge_to_cancel->dest != loop->latch)
466         continue;
467 
468       exits.release ();
469 
470       /* Verify that the code in loop latch does nothing that may end program
471          execution without really reaching the exit.  This may include
472 	 non-pure/const function calls, EH statements, volatile ASMs etc.  */
473       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
474 	if (gimple_has_side_effects (gsi_stmt (gsi)))
475 	   return NULL;
476       return edge_to_cancel;
477     }
478   exits.release ();
479   return NULL;
480 }
481 
482 /* Remove all tests for exits that are known to be taken after LOOP was
483    peeled NPEELED times. Put gcc_unreachable before every statement
484    known to not be executed.  */
485 
486 static bool
remove_exits_and_undefined_stmts(struct loop * loop,unsigned int npeeled)487 remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
488 {
489   struct nb_iter_bound *elt;
490   bool changed = false;
491 
492   for (elt = loop->bounds; elt; elt = elt->next)
493     {
494       /* If statement is known to be undefined after peeling, turn it
495 	 into unreachable (or trap when debugging experience is supposed
496 	 to be good).  */
497       if (!elt->is_exit
498 	  && wi::ltu_p (elt->bound, npeeled))
499 	{
500 	  gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
501 	  gcall *stmt = gimple_build_call
502 	      (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
503 	  gimple_set_location (stmt, gimple_location (elt->stmt));
504 	  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
505 	  split_block (gimple_bb (stmt), stmt);
506 	  changed = true;
507 	  if (dump_file && (dump_flags & TDF_DETAILS))
508 	    {
509 	      fprintf (dump_file, "Forced statement unreachable: ");
510 	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
511 	    }
512 	}
513       /* If we know the exit will be taken after peeling, update.  */
514       else if (elt->is_exit
515 	       && wi::leu_p (elt->bound, npeeled))
516 	{
517 	  basic_block bb = gimple_bb (elt->stmt);
518 	  edge exit_edge = EDGE_SUCC (bb, 0);
519 
520 	  if (dump_file && (dump_flags & TDF_DETAILS))
521 	    {
522 	      fprintf (dump_file, "Forced exit to be taken: ");
523 	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
524 	    }
525 	  if (!loop_exit_edge_p (loop, exit_edge))
526 	    exit_edge = EDGE_SUCC (bb, 1);
527 	  gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
528 	  gcond *cond_stmt = as_a <gcond *> (elt->stmt);
529 	  if (exit_edge->flags & EDGE_TRUE_VALUE)
530 	    gimple_cond_make_true (cond_stmt);
531 	  else
532 	    gimple_cond_make_false (cond_stmt);
533 	  update_stmt (cond_stmt);
534 	  changed = true;
535 	}
536     }
537   return changed;
538 }
539 
540 /* Remove all exits that are known to be never taken because of the loop bound
541    discovered.  */
542 
543 static bool
remove_redundant_iv_tests(struct loop * loop)544 remove_redundant_iv_tests (struct loop *loop)
545 {
546   struct nb_iter_bound *elt;
547   bool changed = false;
548 
549   if (!loop->any_upper_bound)
550     return false;
551   for (elt = loop->bounds; elt; elt = elt->next)
552     {
553       /* Exit is pointless if it won't be taken before loop reaches
554 	 upper bound.  */
555       if (elt->is_exit && loop->any_upper_bound
556           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
557 	{
558 	  basic_block bb = gimple_bb (elt->stmt);
559 	  edge exit_edge = EDGE_SUCC (bb, 0);
560 	  struct tree_niter_desc niter;
561 
562 	  if (!loop_exit_edge_p (loop, exit_edge))
563 	    exit_edge = EDGE_SUCC (bb, 1);
564 
565 	  /* Only when we know the actual number of iterations, not
566 	     just a bound, we can remove the exit.  */
567 	  if (!number_of_iterations_exit (loop, exit_edge,
568 					  &niter, false, false)
569 	      || !integer_onep (niter.assumptions)
570 	      || !integer_zerop (niter.may_be_zero)
571 	      || !niter.niter
572 	      || TREE_CODE (niter.niter) != INTEGER_CST
573 	      || !wi::ltu_p (loop->nb_iterations_upper_bound,
574 			     wi::to_widest (niter.niter)))
575 	    continue;
576 
577 	  if (dump_file && (dump_flags & TDF_DETAILS))
578 	    {
579 	      fprintf (dump_file, "Removed pointless exit: ");
580 	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
581 	    }
582 	  gcond *cond_stmt = as_a <gcond *> (elt->stmt);
583 	  if (exit_edge->flags & EDGE_TRUE_VALUE)
584 	    gimple_cond_make_false (cond_stmt);
585 	  else
586 	    gimple_cond_make_true (cond_stmt);
587 	  update_stmt (cond_stmt);
588 	  changed = true;
589 	}
590     }
591   return changed;
592 }
593 
594 /* Stores loops that will be unlooped after we process whole loop tree. */
595 static vec<loop_p> loops_to_unloop;
596 static vec<int> loops_to_unloop_nunroll;
597 
598 /* Cancel all fully unrolled loops by putting __builtin_unreachable
599    on the latch edge.
600    We do it after all unrolling since unlooping moves basic blocks
601    across loop boundaries trashing loop closed SSA form as well
602    as SCEV info needed to be intact during unrolling.
603 
604    IRRED_INVALIDATED is used to bookkeep if information about
605    irreducible regions may become invalid as a result
606    of the transformation.
607    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
608    when we need to go into loop closed SSA form.  */
609 
610 static void
unloop_loops(bitmap loop_closed_ssa_invalidated,bool * irred_invalidated)611 unloop_loops (bitmap loop_closed_ssa_invalidated,
612 	      bool *irred_invalidated)
613 {
614   while (loops_to_unloop.length ())
615     {
616       struct loop *loop = loops_to_unloop.pop ();
617       int n_unroll = loops_to_unloop_nunroll.pop ();
618       basic_block latch = loop->latch;
619       edge latch_edge = loop_latch_edge (loop);
620       int flags = latch_edge->flags;
621       location_t locus = latch_edge->goto_locus;
622       gcall *stmt;
623       gimple_stmt_iterator gsi;
624 
625       remove_exits_and_undefined_stmts (loop, n_unroll);
626 
627       /* Unloop destroys the latch edge.  */
628       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
629 
630       /* Create new basic block for the latch edge destination and wire
631 	 it in.  */
632       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
633       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
634       latch_edge->probability = 0;
635       latch_edge->count = 0;
636       latch_edge->flags |= flags;
637       latch_edge->goto_locus = locus;
638 
639       latch_edge->dest->loop_father = current_loops->tree_root;
640       latch_edge->dest->count = 0;
641       latch_edge->dest->frequency = 0;
642       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
643 
644       gsi = gsi_start_bb (latch_edge->dest);
645       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
646     }
647   loops_to_unloop.release ();
648   loops_to_unloop_nunroll.release ();
649 }
650 
651 /* Tries to unroll LOOP completely, i.e. NITER times.
652    UL determines which loops we are allowed to unroll.
653    EXIT is the exit of the loop that should be eliminated.
654    MAXITER specfy bound on number of iterations, -1 if it is
655    not known or too large for HOST_WIDE_INT.  The location
656    LOCUS corresponding to the loop is used when emitting
657    a summary of the unroll to the dump file.  */
658 
659 static bool
try_unroll_loop_completely(struct loop * loop,edge exit,tree niter,enum unroll_level ul,HOST_WIDE_INT maxiter,location_t locus)660 try_unroll_loop_completely (struct loop *loop,
661 			    edge exit, tree niter,
662 			    enum unroll_level ul,
663 			    HOST_WIDE_INT maxiter,
664 			    location_t locus)
665 {
666   unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
667   struct loop_size size;
668   bool n_unroll_found = false;
669   edge edge_to_cancel = NULL;
670   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
671 
672   /* See if we proved number of iterations to be low constant.
673 
674      EXIT is an edge that will be removed in all but last iteration of
675      the loop.
676 
677      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
678      of the unrolled sequence and is expected to make the final loop not
679      rolling.
680 
681      If the number of execution of loop is determined by standard induction
682      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
683      from the iv test.  */
684   if (tree_fits_uhwi_p (niter))
685     {
686       n_unroll = tree_to_uhwi (niter);
687       n_unroll_found = true;
688       edge_to_cancel = EDGE_SUCC (exit->src, 0);
689       if (edge_to_cancel == exit)
690 	edge_to_cancel = EDGE_SUCC (exit->src, 1);
691     }
692   /* We do not know the number of iterations and thus we can not eliminate
693      the EXIT edge.  */
694   else
695     exit = NULL;
696 
697   /* See if we can improve our estimate by using recorded loop bounds.  */
698   if (maxiter >= 0
699       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
700     {
701       n_unroll = maxiter;
702       n_unroll_found = true;
703       /* Loop terminates before the IV variable test, so we can not
704 	 remove it in the last iteration.  */
705       edge_to_cancel = NULL;
706     }
707 
708   if (!n_unroll_found)
709     return false;
710 
711   if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
712     {
713       if (dump_file && (dump_flags & TDF_DETAILS))
714 	fprintf (dump_file, "Not unrolling loop %d "
715 		 "(--param max-completely-peeled-times limit reached).\n",
716 		 loop->num);
717       return false;
718     }
719 
720   if (!edge_to_cancel)
721     edge_to_cancel = loop_edge_to_cancel (loop);
722 
723   if (n_unroll)
724     {
725       sbitmap wont_exit;
726       edge e;
727       unsigned i;
728       bool large;
729       vec<edge> to_remove = vNULL;
730       if (ul == UL_SINGLE_ITER)
731 	return false;
732 
733       large = tree_estimate_loop_size
734 		 (loop, exit, edge_to_cancel, &size,
735 		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
736       ninsns = size.overall;
737       if (large)
738 	{
739 	  if (dump_file && (dump_flags & TDF_DETAILS))
740 	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
741 		     loop->num);
742 	  return false;
743 	}
744 
745       unr_insns = estimated_unrolled_size (&size, n_unroll);
746       if (dump_file && (dump_flags & TDF_DETAILS))
747 	{
748 	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
749 	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
750 		   (int) unr_insns);
751 	}
752 
753       /* If the code is going to shrink, we don't need to be extra cautious
754 	 on guessing if the unrolling is going to be profitable.  */
755       if (unr_insns
756 	  /* If there is IV variable that will become constant, we save
757 	     one instruction in the loop prologue we do not account
758 	     otherwise.  */
759 	  <= ninsns + (size.constant_iv != false))
760 	;
761       /* We unroll only inner loops, because we do not consider it profitable
762 	 otheriwse.  We still can cancel loopback edge of not rolling loop;
763 	 this is always a good idea.  */
764       else if (ul == UL_NO_GROWTH)
765 	{
766 	  if (dump_file && (dump_flags & TDF_DETAILS))
767 	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
768 		     loop->num);
769 	  return false;
770 	}
771       /* Outer loops tend to be less interesting candidates for complete
772 	 unrolling unless we can do a lot of propagation into the inner loop
773 	 body.  For now we disable outer loop unrolling when the code would
774 	 grow.  */
775       else if (loop->inner)
776 	{
777 	  if (dump_file && (dump_flags & TDF_DETAILS))
778 	    fprintf (dump_file, "Not unrolling loop %d: "
779 		     "it is not innermost and code would grow.\n",
780 		     loop->num);
781 	  return false;
782 	}
783       /* If there is call on a hot path through the loop, then
784 	 there is most probably not much to optimize.  */
785       else if (size.num_non_pure_calls_on_hot_path)
786 	{
787 	  if (dump_file && (dump_flags & TDF_DETAILS))
788 	    fprintf (dump_file, "Not unrolling loop %d: "
789 		     "contains call and code would grow.\n",
790 		     loop->num);
791 	  return false;
792 	}
793       /* If there is pure/const call in the function, then we
794 	 can still optimize the unrolled loop body if it contains
795 	 some other interesting code than the calls and code
796 	 storing or cumulating the return value.  */
797       else if (size.num_pure_calls_on_hot_path
798 	       /* One IV increment, one test, one ivtmp store
799 		  and one useful stmt.  That is about minimal loop
800 		  doing pure call.  */
801 	       && (size.non_call_stmts_on_hot_path
802 		   <= 3 + size.num_pure_calls_on_hot_path))
803 	{
804 	  if (dump_file && (dump_flags & TDF_DETAILS))
805 	    fprintf (dump_file, "Not unrolling loop %d: "
806 		     "contains just pure calls and code would grow.\n",
807 		     loop->num);
808 	  return false;
809 	}
810       /* Complette unrolling is major win when control flow is removed and
811 	 one big basic block is created.  If the loop contains control flow
812 	 the optimization may still be a win because of eliminating the loop
813 	 overhead but it also may blow the branch predictor tables.
814 	 Limit number of branches on the hot path through the peeled
815 	 sequence.  */
816       else if (size.num_branches_on_hot_path * (int)n_unroll
817 	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
818 	{
819 	  if (dump_file && (dump_flags & TDF_DETAILS))
820 	    fprintf (dump_file, "Not unrolling loop %d: "
821 		     " number of branches on hot path in the unrolled sequence"
822 		     " reach --param max-peel-branches limit.\n",
823 		     loop->num);
824 	  return false;
825 	}
826       else if (unr_insns
827 	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
828 	{
829 	  if (dump_file && (dump_flags & TDF_DETAILS))
830 	    fprintf (dump_file, "Not unrolling loop %d: "
831 		     "(--param max-completely-peeled-insns limit reached).\n",
832 		     loop->num);
833 	  return false;
834 	}
835       dump_printf_loc (report_flags, locus,
836                        "loop turned into non-loop; it never loops.\n");
837 
838       initialize_original_copy_tables ();
839       wont_exit = sbitmap_alloc (n_unroll + 1);
840       bitmap_ones (wont_exit);
841       bitmap_clear_bit (wont_exit, 0);
842 
843       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
844 						 n_unroll, wont_exit,
845 						 exit, &to_remove,
846 						 DLTHE_FLAG_UPDATE_FREQ
847 						 | DLTHE_FLAG_COMPLETTE_PEEL))
848 	{
849           free_original_copy_tables ();
850 	  free (wont_exit);
851 	  if (dump_file && (dump_flags & TDF_DETAILS))
852 	    fprintf (dump_file, "Failed to duplicate the loop\n");
853 	  return false;
854 	}
855 
856       FOR_EACH_VEC_ELT (to_remove, i, e)
857 	{
858 	  bool ok = remove_path (e);
859 	  gcc_assert (ok);
860 	}
861 
862       to_remove.release ();
863       free (wont_exit);
864       free_original_copy_tables ();
865     }
866 
867 
868   /* Remove the conditional from the last copy of the loop.  */
869   if (edge_to_cancel)
870     {
871       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
872       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
873 	gimple_cond_make_false (cond);
874       else
875 	gimple_cond_make_true (cond);
876       update_stmt (cond);
877       /* Do not remove the path. Doing so may remove outer loop
878 	 and confuse bookkeeping code in tree_unroll_loops_completelly.  */
879     }
880 
881   /* Store the loop for later unlooping and exit removal.  */
882   loops_to_unloop.safe_push (loop);
883   loops_to_unloop_nunroll.safe_push (n_unroll);
884 
885   if (dump_enabled_p ())
886     {
887       if (!n_unroll)
888         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
889                          "loop turned into non-loop; it never loops\n");
890       else
891         {
892           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
893                            "loop with %d iterations completely unrolled",
894 			   (int) (n_unroll + 1));
895           if (profile_info)
896             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
897                          " (header execution count %d)",
898                          (int)loop->header->count);
899           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
900         }
901     }
902 
903   if (dump_file && (dump_flags & TDF_DETAILS))
904     {
905       if (exit)
906         fprintf (dump_file, "Exit condition of peeled iterations was "
907 		 "eliminated.\n");
908       if (edge_to_cancel)
909         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
910       else
911         fprintf (dump_file, "Latch of last iteration was marked by "
912 		 "__builtin_unreachable ().\n");
913     }
914 
915   return true;
916 }
917 
918 /* Return number of instructions after peeling.  */
919 static unsigned HOST_WIDE_INT
estimated_peeled_sequence_size(struct loop_size * size,unsigned HOST_WIDE_INT npeel)920 estimated_peeled_sequence_size (struct loop_size *size,
921 			        unsigned HOST_WIDE_INT npeel)
922 {
923   return MAX (npeel * (HOST_WIDE_INT) (size->overall
924 			     	       - size->eliminated_by_peeling), 1);
925 }
926 
927 /* If the loop is expected to iterate N times and is
928    small enough, duplicate the loop body N+1 times before
929    the loop itself.  This way the hot path will never
930    enter the loop.
931    Parameters are the same as for try_unroll_loops_completely */
932 
933 static bool
try_peel_loop(struct loop * loop,edge exit,tree niter,HOST_WIDE_INT maxiter)934 try_peel_loop (struct loop *loop,
935 	       edge exit, tree niter,
936 	       HOST_WIDE_INT maxiter)
937 {
938   HOST_WIDE_INT npeel;
939   struct loop_size size;
940   int peeled_size;
941   sbitmap wont_exit;
942   unsigned i;
943   vec<edge> to_remove = vNULL;
944   edge e;
945 
946   /* If the iteration bound is known and large, then we can safely eliminate
947      the check in peeled copies.  */
948   if (TREE_CODE (niter) != INTEGER_CST)
949     exit = NULL;
950 
951   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
952     return false;
953 
954   /* Peel only innermost loops.  */
955   if (loop->inner)
956     {
957       if (dump_file)
958         fprintf (dump_file, "Not peeling: outer loop\n");
959       return false;
960     }
961 
962   if (!optimize_loop_for_speed_p (loop))
963     {
964       if (dump_file)
965         fprintf (dump_file, "Not peeling: cold loop\n");
966       return false;
967     }
968 
969   /* Check if there is an estimate on the number of iterations.  */
970   npeel = estimated_loop_iterations_int (loop);
971   if (npeel < 0)
972     {
973       if (dump_file)
974         fprintf (dump_file, "Not peeling: number of iterations is not "
975 	         "estimated\n");
976       return false;
977     }
978   if (maxiter >= 0 && maxiter <= npeel)
979     {
980       if (dump_file)
981         fprintf (dump_file, "Not peeling: upper bound is known so can "
982 		 "unroll completely\n");
983       return false;
984     }
985 
986   /* We want to peel estimated number of iterations + 1 (so we never
987      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
988      and be sure to avoid overflows.  */
989   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
990     {
991       if (dump_file)
992         fprintf (dump_file, "Not peeling: rolls too much "
993 		 "(%i + 1 > --param max-peel-times)\n", (int) npeel);
994       return false;
995     }
996   npeel++;
997 
998   /* Check peeled loops size.  */
999   tree_estimate_loop_size (loop, exit, NULL, &size,
1000 			   PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
1001   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1002       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1003     {
1004       if (dump_file)
1005         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1006 		 "(%i insns > --param max-peel-insns)", peeled_size);
1007       return false;
1008     }
1009 
1010   /* Duplicate possibly eliminating the exits.  */
1011   initialize_original_copy_tables ();
1012   wont_exit = sbitmap_alloc (npeel + 1);
1013   bitmap_ones (wont_exit);
1014   bitmap_clear_bit (wont_exit, 0);
1015   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1016 					     npeel, wont_exit,
1017 					     exit, &to_remove,
1018 					     DLTHE_FLAG_UPDATE_FREQ
1019 					     | DLTHE_FLAG_COMPLETTE_PEEL))
1020     {
1021       free_original_copy_tables ();
1022       free (wont_exit);
1023       return false;
1024     }
1025   FOR_EACH_VEC_ELT (to_remove, i, e)
1026     {
1027       bool ok = remove_path (e);
1028       gcc_assert (ok);
1029     }
1030   free (wont_exit);
1031   free_original_copy_tables ();
1032   if (dump_file && (dump_flags & TDF_DETAILS))
1033     {
1034       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1035 	       loop->num, (int) npeel);
1036     }
1037   if (loop->any_upper_bound)
1038     loop->nb_iterations_upper_bound -= npeel;
1039   loop->nb_iterations_estimate = 0;
1040   /* Make sure to mark loop cold so we do not try to peel it more.  */
1041   scale_loop_profile (loop, 1, 0);
1042   loop->header->count = 0;
1043   return true;
1044 }
1045 /* Adds a canonical induction variable to LOOP if suitable.
1046    CREATE_IV is true if we may create a new iv.  UL determines
1047    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1048    to determine the number of iterations of a loop by direct evaluation.
1049    Returns true if cfg is changed.   */
1050 
1051 static bool
canonicalize_loop_induction_variables(struct loop * loop,bool create_iv,enum unroll_level ul,bool try_eval)1052 canonicalize_loop_induction_variables (struct loop *loop,
1053 				       bool create_iv, enum unroll_level ul,
1054 				       bool try_eval)
1055 {
1056   edge exit = NULL;
1057   tree niter;
1058   HOST_WIDE_INT maxiter;
1059   bool modified = false;
1060   location_t locus = UNKNOWN_LOCATION;
1061 
1062   niter = number_of_latch_executions (loop);
1063   exit = single_exit (loop);
1064   if (TREE_CODE (niter) == INTEGER_CST)
1065     locus = gimple_location (last_stmt (exit->src));
1066   else
1067     {
1068       /* If the loop has more than one exit, try checking all of them
1069 	 for # of iterations determinable through scev.  */
1070       if (!exit)
1071 	niter = find_loop_niter (loop, &exit);
1072 
1073       /* Finally if everything else fails, try brute force evaluation.  */
1074       if (try_eval
1075 	  && (chrec_contains_undetermined (niter)
1076 	      || TREE_CODE (niter) != INTEGER_CST))
1077 	niter = find_loop_niter_by_eval (loop, &exit);
1078 
1079       if (exit)
1080         locus = gimple_location (last_stmt (exit->src));
1081 
1082       if (TREE_CODE (niter) != INTEGER_CST)
1083 	exit = NULL;
1084     }
1085 
1086   /* We work exceptionally hard here to estimate the bound
1087      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1088   if (niter && TREE_CODE (niter) == INTEGER_CST)
1089     {
1090       record_niter_bound (loop, wi::to_widest (niter),
1091 			  exit == single_likely_exit (loop), true);
1092     }
1093 
1094   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1095   maxiter = max_loop_iterations_int (loop);
1096 
1097   if (dump_file && (dump_flags & TDF_DETAILS)
1098       && TREE_CODE (niter) == INTEGER_CST)
1099     {
1100       fprintf (dump_file, "Loop %d iterates ", loop->num);
1101       print_generic_expr (dump_file, niter, TDF_SLIM);
1102       fprintf (dump_file, " times.\n");
1103     }
1104   if (dump_file && (dump_flags & TDF_DETAILS)
1105       && maxiter >= 0)
1106     {
1107       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1108 	       (int)maxiter);
1109     }
1110 
1111   /* Remove exits that are known to be never taken based on loop bound.
1112      Needs to be called after compilation of max_loop_iterations_int that
1113      populates the loop bounds.  */
1114   modified |= remove_redundant_iv_tests (loop);
1115 
1116   if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
1117     return true;
1118 
1119   if (create_iv
1120       && niter && !chrec_contains_undetermined (niter)
1121       && exit && just_once_each_iteration_p (loop, exit->src))
1122     create_canonical_iv (loop, exit, niter);
1123 
1124   if (ul == UL_ALL)
1125     modified |= try_peel_loop (loop, exit, niter, maxiter);
1126 
1127   return modified;
1128 }
1129 
1130 /* The main entry point of the pass.  Adds canonical induction variables
1131    to the suitable loops.  */
1132 
1133 unsigned int
canonicalize_induction_variables(void)1134 canonicalize_induction_variables (void)
1135 {
1136   struct loop *loop;
1137   bool changed = false;
1138   bool irred_invalidated = false;
1139   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1140 
1141   free_numbers_of_iterations_estimates (cfun);
1142   estimate_numbers_of_iterations ();
1143 
1144   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1145     {
1146       changed |= canonicalize_loop_induction_variables (loop,
1147 							true, UL_SINGLE_ITER,
1148 							true);
1149     }
1150   gcc_assert (!need_ssa_update_p (cfun));
1151 
1152   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1153   if (irred_invalidated
1154       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1155     mark_irreducible_loops ();
1156 
1157   /* Clean up the information about numbers of iterations, since brute force
1158      evaluation could reveal new information.  */
1159   scev_reset ();
1160 
1161   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1162     {
1163       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1164       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1165     }
1166   BITMAP_FREE (loop_closed_ssa_invalidated);
1167 
1168   if (changed)
1169     return TODO_cleanup_cfg;
1170   return 0;
1171 }
1172 
1173 /* Propagate constant SSA_NAMEs defined in basic block BB.  */
1174 
1175 static void
propagate_constants_for_unrolling(basic_block bb)1176 propagate_constants_for_unrolling (basic_block bb)
1177 {
1178   /* Look for degenerate PHI nodes with constant argument.  */
1179   for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
1180     {
1181       gphi *phi = gsi.phi ();
1182       tree result = gimple_phi_result (phi);
1183       tree arg = gimple_phi_arg_def (phi, 0);
1184 
1185       if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
1186 	  && gimple_phi_num_args (phi) == 1
1187 	  && TREE_CODE (arg) == INTEGER_CST)
1188 	{
1189 	  replace_uses_by (result, arg);
1190 	  gsi_remove (&gsi, true);
1191 	  release_ssa_name (result);
1192 	}
1193       else
1194 	gsi_next (&gsi);
1195     }
1196 
1197   /* Look for assignments to SSA names with constant RHS.  */
1198   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1199     {
1200       gimple *stmt = gsi_stmt (gsi);
1201       tree lhs;
1202 
1203       if (is_gimple_assign (stmt)
1204 	  && gimple_assign_rhs_code (stmt) == INTEGER_CST
1205 	  && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
1206 	  && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1207 	{
1208 	  replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
1209 	  gsi_remove (&gsi, true);
1210 	  release_ssa_name (lhs);
1211 	}
1212       else
1213 	gsi_next (&gsi);
1214     }
1215 }
1216 
1217 /* Process loops from innermost to outer, stopping at the innermost
1218    loop we unrolled.  */
1219 
1220 static bool
tree_unroll_loops_completely_1(bool may_increase_size,bool unroll_outer,vec<loop_p,va_heap> & father_stack,struct loop * loop)1221 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1222 				vec<loop_p, va_heap>& father_stack,
1223 				struct loop *loop)
1224 {
1225   struct loop *loop_father;
1226   bool changed = false;
1227   struct loop *inner;
1228   enum unroll_level ul;
1229 
1230   /* Process inner loops first.  */
1231   for (inner = loop->inner; inner != NULL; inner = inner->next)
1232     changed |= tree_unroll_loops_completely_1 (may_increase_size,
1233 					       unroll_outer, father_stack,
1234 					       inner);
1235 
1236   /* If we changed an inner loop we cannot process outer loops in this
1237      iteration because SSA form is not up-to-date.  Continue with
1238      siblings of outer loops instead.  */
1239   if (changed)
1240     return true;
1241 
1242   /* Don't unroll #pragma omp simd loops until the vectorizer
1243      attempts to vectorize those.  */
1244   if (loop->force_vectorize)
1245     return false;
1246 
1247   /* Try to unroll this loop.  */
1248   loop_father = loop_outer (loop);
1249   if (!loop_father)
1250     return false;
1251 
1252   if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1253       /* Unroll outermost loops only if asked to do so or they do
1254 	 not cause code growth.  */
1255       && (unroll_outer || loop_outer (loop_father)))
1256     ul = UL_ALL;
1257   else
1258     ul = UL_NO_GROWTH;
1259 
1260   if (canonicalize_loop_induction_variables
1261         (loop, false, ul, !flag_tree_loop_ivcanon))
1262     {
1263       /* If we'll continue unrolling, we need to propagate constants
1264 	 within the new basic blocks to fold away induction variable
1265 	 computations; otherwise, the size might blow up before the
1266 	 iteration is complete and the IR eventually cleaned up.  */
1267       if (loop_outer (loop_father) && !loop_father->aux)
1268 	{
1269 	  father_stack.safe_push (loop_father);
1270 	  loop_father->aux = loop_father;
1271 	}
1272 
1273       return true;
1274     }
1275 
1276   return false;
1277 }
1278 
1279 /* Unroll LOOPS completely if they iterate just few times.  Unless
1280    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1281    size of the code does not increase.  */
1282 
1283 unsigned int
tree_unroll_loops_completely(bool may_increase_size,bool unroll_outer)1284 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1285 {
1286   auto_vec<loop_p, 16> father_stack;
1287   bool changed;
1288   int iteration = 0;
1289   bool irred_invalidated = false;
1290 
1291   do
1292     {
1293       changed = false;
1294       bitmap loop_closed_ssa_invalidated = NULL;
1295 
1296       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1297 	loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1298 
1299       free_numbers_of_iterations_estimates (cfun);
1300       estimate_numbers_of_iterations ();
1301 
1302       changed = tree_unroll_loops_completely_1 (may_increase_size,
1303 						unroll_outer, father_stack,
1304 						current_loops->tree_root);
1305       if (changed)
1306 	{
1307 	  struct loop **iter;
1308 	  unsigned i;
1309 
1310 	  /* Be sure to skip unlooped loops while procesing father_stack
1311 	     array.  */
1312 	  FOR_EACH_VEC_ELT (loops_to_unloop, i, iter)
1313 	    (*iter)->aux = NULL;
1314 	  FOR_EACH_VEC_ELT (father_stack, i, iter)
1315 	    if (!(*iter)->aux)
1316 	      *iter = NULL;
1317           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1318 
1319 	  /* We can not use TODO_update_ssa_no_phi because VOPS gets confused.  */
1320 	  if (loop_closed_ssa_invalidated
1321 	      && !bitmap_empty_p (loop_closed_ssa_invalidated))
1322             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1323 					  TODO_update_ssa);
1324 	  else
1325 	    update_ssa (TODO_update_ssa);
1326 
1327 	  /* Propagate the constants within the new basic blocks.  */
1328 	  FOR_EACH_VEC_ELT (father_stack, i, iter)
1329 	    if (*iter)
1330 	      {
1331 		unsigned j;
1332 		basic_block *body = get_loop_body_in_dom_order (*iter);
1333 		for (j = 0; j < (*iter)->num_nodes; j++)
1334 		  propagate_constants_for_unrolling (body[j]);
1335 		free (body);
1336 		(*iter)->aux = NULL;
1337 	      }
1338 	  father_stack.truncate (0);
1339 
1340 	  /* This will take care of removing completely unrolled loops
1341 	     from the loop structures so we can continue unrolling now
1342 	     innermost loops.  */
1343 	  if (cleanup_tree_cfg ())
1344 	    update_ssa (TODO_update_ssa_only_virtuals);
1345 
1346 	  /* Clean up the information about numbers of iterations, since
1347 	     complete unrolling might have invalidated it.  */
1348 	  scev_reset ();
1349 	  if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1350 	    verify_loop_closed_ssa (true);
1351 	}
1352       if (loop_closed_ssa_invalidated)
1353         BITMAP_FREE (loop_closed_ssa_invalidated);
1354     }
1355   while (changed
1356 	 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
1357 
1358   father_stack.release ();
1359 
1360   if (irred_invalidated
1361       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1362     mark_irreducible_loops ();
1363 
1364   return 0;
1365 }
1366 
1367 /* Canonical induction variable creation pass.  */
1368 
1369 namespace {
1370 
1371 const pass_data pass_data_iv_canon =
1372 {
1373   GIMPLE_PASS, /* type */
1374   "ivcanon", /* name */
1375   OPTGROUP_LOOP, /* optinfo_flags */
1376   TV_TREE_LOOP_IVCANON, /* tv_id */
1377   ( PROP_cfg | PROP_ssa ), /* properties_required */
1378   0, /* properties_provided */
1379   0, /* properties_destroyed */
1380   0, /* todo_flags_start */
1381   0, /* todo_flags_finish */
1382 };
1383 
1384 class pass_iv_canon : public gimple_opt_pass
1385 {
1386 public:
pass_iv_canon(gcc::context * ctxt)1387   pass_iv_canon (gcc::context *ctxt)
1388     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1389   {}
1390 
1391   /* opt_pass methods: */
gate(function *)1392   virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1393   virtual unsigned int execute (function *fun);
1394 
1395 }; // class pass_iv_canon
1396 
1397 unsigned int
execute(function * fun)1398 pass_iv_canon::execute (function *fun)
1399 {
1400   if (number_of_loops (fun) <= 1)
1401     return 0;
1402 
1403   return canonicalize_induction_variables ();
1404 }
1405 
1406 } // anon namespace
1407 
1408 gimple_opt_pass *
make_pass_iv_canon(gcc::context * ctxt)1409 make_pass_iv_canon (gcc::context *ctxt)
1410 {
1411   return new pass_iv_canon (ctxt);
1412 }
1413 
1414 /* Complete unrolling of loops.  */
1415 
1416 namespace {
1417 
1418 const pass_data pass_data_complete_unroll =
1419 {
1420   GIMPLE_PASS, /* type */
1421   "cunroll", /* name */
1422   OPTGROUP_LOOP, /* optinfo_flags */
1423   TV_COMPLETE_UNROLL, /* tv_id */
1424   ( PROP_cfg | PROP_ssa ), /* properties_required */
1425   0, /* properties_provided */
1426   0, /* properties_destroyed */
1427   0, /* todo_flags_start */
1428   0, /* todo_flags_finish */
1429 };
1430 
1431 class pass_complete_unroll : public gimple_opt_pass
1432 {
1433 public:
pass_complete_unroll(gcc::context * ctxt)1434   pass_complete_unroll (gcc::context *ctxt)
1435     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1436   {}
1437 
1438   /* opt_pass methods: */
1439   virtual unsigned int execute (function *);
1440 
1441 }; // class pass_complete_unroll
1442 
1443 unsigned int
execute(function * fun)1444 pass_complete_unroll::execute (function *fun)
1445 {
1446   if (number_of_loops (fun) <= 1)
1447     return 0;
1448 
1449   return tree_unroll_loops_completely (flag_unroll_loops
1450 				       || flag_peel_loops
1451 				       || optimize >= 3, true);
1452 }
1453 
1454 } // anon namespace
1455 
1456 gimple_opt_pass *
make_pass_complete_unroll(gcc::context * ctxt)1457 make_pass_complete_unroll (gcc::context *ctxt)
1458 {
1459   return new pass_complete_unroll (ctxt);
1460 }
1461 
1462 /* Complete unrolling of inner loops.  */
1463 
1464 namespace {
1465 
1466 const pass_data pass_data_complete_unrolli =
1467 {
1468   GIMPLE_PASS, /* type */
1469   "cunrolli", /* name */
1470   OPTGROUP_LOOP, /* optinfo_flags */
1471   TV_COMPLETE_UNROLL, /* tv_id */
1472   ( PROP_cfg | PROP_ssa ), /* properties_required */
1473   0, /* properties_provided */
1474   0, /* properties_destroyed */
1475   0, /* todo_flags_start */
1476   0, /* todo_flags_finish */
1477 };
1478 
1479 class pass_complete_unrolli : public gimple_opt_pass
1480 {
1481 public:
pass_complete_unrolli(gcc::context * ctxt)1482   pass_complete_unrolli (gcc::context *ctxt)
1483     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1484   {}
1485 
1486   /* opt_pass methods: */
gate(function *)1487   virtual bool gate (function *) { return optimize >= 2; }
1488   virtual unsigned int execute (function *);
1489 
1490 }; // class pass_complete_unrolli
1491 
1492 unsigned int
execute(function * fun)1493 pass_complete_unrolli::execute (function *fun)
1494 {
1495   unsigned ret = 0;
1496 
1497   loop_optimizer_init (LOOPS_NORMAL
1498 		       | LOOPS_HAVE_RECORDED_EXITS);
1499   if (number_of_loops (fun) > 1)
1500     {
1501       scev_initialize ();
1502       ret = tree_unroll_loops_completely (optimize >= 3, false);
1503       free_numbers_of_iterations_estimates (fun);
1504       scev_finalize ();
1505     }
1506   loop_optimizer_finalize ();
1507 
1508   return ret;
1509 }
1510 
1511 } // anon namespace
1512 
1513 gimple_opt_pass *
make_pass_complete_unrolli(gcc::context * ctxt)1514 make_pass_complete_unrolli (gcc::context *ctxt)
1515 {
1516   return new pass_complete_unrolli (ctxt);
1517 }
1518 
1519 
1520