1 /* Induction variable canonicalization and loop peeling.
2    Copyright (C) 2004-2019 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass detects the loops that iterate a constant number of times,
21    adds a canonical induction variable (step -1, tested against 0)
22    and replaces the exit test.  This enables the less powerful rtl
23    level analysis to use this information.
24 
25    This might spoil the code in some cases (by increasing register pressure).
26    Note that in the case the new variable is not needed, ivopts will get rid
27    of it, so it might only be a problem when there are no other linear induction
28    variables.  In that case the created optimization possibilities are likely
29    to pay up.
30 
31    We also perform
32      - complete unrolling (or peeling) when the loops is rolling few enough
33        times
34      - simple peeling (i.e. copying few initial iterations prior the loop)
35        when number of iteration estimate is known (typically by the profile
36        info).  */
37 
38 #include "config.h"
39 #include "system.h"
40 #include "coretypes.h"
41 #include "backend.h"
42 #include "tree.h"
43 #include "gimple.h"
44 #include "cfghooks.h"
45 #include "tree-pass.h"
46 #include "ssa.h"
47 #include "cgraph.h"
48 #include "gimple-pretty-print.h"
49 #include "fold-const.h"
50 #include "profile.h"
51 #include "gimple-fold.h"
52 #include "tree-eh.h"
53 #include "gimple-iterator.h"
54 #include "tree-cfg.h"
55 #include "tree-ssa-loop-manip.h"
56 #include "tree-ssa-loop-niter.h"
57 #include "tree-ssa-loop.h"
58 #include "tree-into-ssa.h"
59 #include "cfgloop.h"
60 #include "tree-chrec.h"
61 #include "tree-scalar-evolution.h"
62 #include "params.h"
63 #include "tree-inline.h"
64 #include "tree-cfgcleanup.h"
65 #include "builtins.h"
66 #include "tree-ssa-sccvn.h"
67 
68 /* Specifies types of loops that may be unrolled.  */
69 
70 enum unroll_level
71 {
72   UL_SINGLE_ITER,	/* Only loops that exit immediately in the first
73 			   iteration.  */
74   UL_NO_GROWTH,		/* Only loops whose unrolling will not cause increase
75 			   of code size.  */
76   UL_ALL		/* All suitable loops.  */
77 };
78 
79 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
80    is the exit edge whose condition is replaced.  The ssa versions of the new
81    IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
82    if they are not NULL.  */
83 
84 void
85 create_canonical_iv (struct loop *loop, edge exit, tree niter,
86 		     tree *var_before = NULL, tree *var_after = NULL)
87 {
88   edge in;
89   tree type, var;
90   gcond *cond;
91   gimple_stmt_iterator incr_at;
92   enum tree_code cmp;
93 
94   if (dump_file && (dump_flags & TDF_DETAILS))
95     {
96       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
97       print_generic_expr (dump_file, niter, TDF_SLIM);
98       fprintf (dump_file, " iterations.\n");
99     }
100 
101   cond = as_a <gcond *> (last_stmt (exit->src));
102   in = EDGE_SUCC (exit->src, 0);
103   if (in == exit)
104     in = EDGE_SUCC (exit->src, 1);
105 
106   /* Note that we do not need to worry about overflows, since
107      type of niter is always unsigned and all comparisons are
108      just for equality/nonequality -- i.e. everything works
109      with a modulo arithmetics.  */
110 
111   type = TREE_TYPE (niter);
112   niter = fold_build2 (PLUS_EXPR, type,
113 		       niter,
114 		       build_int_cst (type, 1));
115   incr_at = gsi_last_bb (in->src);
116   create_iv (niter,
117 	     build_int_cst (type, -1),
118 	     NULL_TREE, loop,
119 	     &incr_at, false, var_before, &var);
120   if (var_after)
121     *var_after = var;
122 
123   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
124   gimple_cond_set_code (cond, cmp);
125   gimple_cond_set_lhs (cond, var);
126   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
127   update_stmt (cond);
128 }
129 
130 /* Describe size of loop as detected by tree_estimate_loop_size.  */
131 struct loop_size
132 {
133   /* Number of instructions in the loop.  */
134   int overall;
135 
136   /* Number of instructions that will be likely optimized out in
137      peeled iterations of loop  (i.e. computation based on induction
138      variable where induction variable starts at known constant.)  */
139   int eliminated_by_peeling;
140 
141   /* Same statistics for last iteration of loop: it is smaller because
142      instructions after exit are not executed.  */
143   int last_iteration;
144   int last_iteration_eliminated_by_peeling;
145 
146   /* If some IV computation will become constant.  */
147   bool constant_iv;
148 
149   /* Number of call stmts that are not a builtin and are pure or const
150      present on the hot path.  */
151   int num_pure_calls_on_hot_path;
152   /* Number of call stmts that are not a builtin and are not pure nor const
153      present on the hot path.  */
154   int num_non_pure_calls_on_hot_path;
155   /* Number of statements other than calls in the loop.  */
156   int non_call_stmts_on_hot_path;
157   /* Number of branches seen on the hot path.  */
158   int num_branches_on_hot_path;
159 };
160 
161 /* Return true if OP in STMT will be constant after peeling LOOP.  */
162 
163 static bool
constant_after_peeling(tree op,gimple * stmt,struct loop * loop)164 constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
165 {
166   if (is_gimple_min_invariant (op))
167     return true;
168 
169   /* We can still fold accesses to constant arrays when index is known.  */
170   if (TREE_CODE (op) != SSA_NAME)
171     {
172       tree base = op;
173 
174       /* First make fast look if we see constant array inside.  */
175       while (handled_component_p (base))
176 	base = TREE_OPERAND (base, 0);
177       if ((DECL_P (base)
178 	   && ctor_for_folding (base) != error_mark_node)
179 	  || CONSTANT_CLASS_P (base))
180 	{
181 	  /* If so, see if we understand all the indices.  */
182 	  base = op;
183 	  while (handled_component_p (base))
184 	    {
185 	      if (TREE_CODE (base) == ARRAY_REF
186 		  && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
187 		return false;
188 	      base = TREE_OPERAND (base, 0);
189 	    }
190 	  return true;
191 	}
192       return false;
193     }
194 
195   /* Induction variables are constants when defined in loop.  */
196   if (loop_containing_stmt (stmt) != loop)
197     return false;
198   tree ev = analyze_scalar_evolution (loop, op);
199   if (chrec_contains_undetermined (ev)
200       || chrec_contains_symbols (ev))
201     return false;
202   return true;
203 }
204 
205 /* Computes an estimated number of insns in LOOP.
206    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
207    iteration of the loop.
208    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
209    of loop.
210    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
211    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
212 
213 static bool
tree_estimate_loop_size(struct loop * loop,edge exit,edge edge_to_cancel,struct loop_size * size,int upper_bound)214 tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel,
215 			 struct loop_size *size, int upper_bound)
216 {
217   basic_block *body = get_loop_body (loop);
218   gimple_stmt_iterator gsi;
219   unsigned int i;
220   bool after_exit;
221   vec<basic_block> path = get_loop_hot_path (loop);
222 
223   size->overall = 0;
224   size->eliminated_by_peeling = 0;
225   size->last_iteration = 0;
226   size->last_iteration_eliminated_by_peeling = 0;
227   size->num_pure_calls_on_hot_path = 0;
228   size->num_non_pure_calls_on_hot_path = 0;
229   size->non_call_stmts_on_hot_path = 0;
230   size->num_branches_on_hot_path = 0;
231   size->constant_iv = 0;
232 
233   if (dump_file && (dump_flags & TDF_DETAILS))
234     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
235   for (i = 0; i < loop->num_nodes; i++)
236     {
237       if (edge_to_cancel && body[i] != edge_to_cancel->src
238 	  && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
239 	after_exit = true;
240       else
241 	after_exit = false;
242       if (dump_file && (dump_flags & TDF_DETAILS))
243 	fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
244 		 after_exit);
245 
246       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
247 	{
248 	  gimple *stmt = gsi_stmt (gsi);
249 	  int num = estimate_num_insns (stmt, &eni_size_weights);
250 	  bool likely_eliminated = false;
251 	  bool likely_eliminated_last = false;
252 	  bool likely_eliminated_peeled = false;
253 
254 	  if (dump_file && (dump_flags & TDF_DETAILS))
255 	    {
256 	      fprintf (dump_file, "  size: %3i ", num);
257 	      print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
258 	    }
259 
260 	  /* Look for reasons why we might optimize this stmt away. */
261 
262 	  if (!gimple_has_side_effects (stmt))
263 	    {
264 	      /* Exit conditional.  */
265 	      if (exit && body[i] == exit->src
266 		  && stmt == last_stmt (exit->src))
267 		{
268 		  if (dump_file && (dump_flags & TDF_DETAILS))
269 		    fprintf (dump_file, "   Exit condition will be eliminated "
270 			     "in peeled copies.\n");
271 		  likely_eliminated_peeled = true;
272 		}
273 	      if (edge_to_cancel && body[i] == edge_to_cancel->src
274 		  && stmt == last_stmt (edge_to_cancel->src))
275 		{
276 		  if (dump_file && (dump_flags & TDF_DETAILS))
277 		    fprintf (dump_file, "   Exit condition will be eliminated "
278 			     "in last copy.\n");
279 		  likely_eliminated_last = true;
280 		}
281 	      /* Sets of IV variables  */
282 	      if (gimple_code (stmt) == GIMPLE_ASSIGN
283 		  && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
284 		{
285 		  if (dump_file && (dump_flags & TDF_DETAILS))
286 		    fprintf (dump_file, "   Induction variable computation will"
287 			     " be folded away.\n");
288 		  likely_eliminated = true;
289 		}
290 	      /* Assignments of IV variables.  */
291 	      else if (gimple_code (stmt) == GIMPLE_ASSIGN
292 		       && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
293 		       && constant_after_peeling (gimple_assign_rhs1 (stmt),
294 						  stmt, loop)
295 		       && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
296 			   || constant_after_peeling (gimple_assign_rhs2 (stmt),
297 						      stmt, loop)))
298 		{
299 		  size->constant_iv = true;
300 		  if (dump_file && (dump_flags & TDF_DETAILS))
301 		    fprintf (dump_file,
302 			     "   Constant expression will be folded away.\n");
303 		  likely_eliminated = true;
304 		}
305 	      /* Conditionals.  */
306 	      else if ((gimple_code (stmt) == GIMPLE_COND
307 			&& constant_after_peeling (gimple_cond_lhs (stmt), stmt,
308 						   loop)
309 			&& constant_after_peeling (gimple_cond_rhs (stmt), stmt,
310 						   loop)
311 			/* We don't simplify all constant compares so make sure
312 			   they are not both constant already.  See PR70288.  */
313 			&& (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
314 			    || ! is_gimple_min_invariant
315 				 (gimple_cond_rhs (stmt))))
316 		       || (gimple_code (stmt) == GIMPLE_SWITCH
317 			   && constant_after_peeling (gimple_switch_index (
318 							as_a <gswitch *>
319 							  (stmt)),
320 						      stmt, loop)
321 			   && ! is_gimple_min_invariant
322 				   (gimple_switch_index
323 				      (as_a <gswitch *> (stmt)))))
324 		{
325 		  if (dump_file && (dump_flags & TDF_DETAILS))
326 		    fprintf (dump_file, "   Constant conditional.\n");
327 		  likely_eliminated = true;
328 		}
329 	    }
330 
331 	  size->overall += num;
332 	  if (likely_eliminated || likely_eliminated_peeled)
333 	    size->eliminated_by_peeling += num;
334 	  if (!after_exit)
335 	    {
336 	      size->last_iteration += num;
337 	      if (likely_eliminated || likely_eliminated_last)
338 		size->last_iteration_eliminated_by_peeling += num;
339 	    }
340 	  if ((size->overall * 3 / 2 - size->eliminated_by_peeling
341 	      - size->last_iteration_eliminated_by_peeling) > upper_bound)
342 	    {
343               free (body);
344 	      path.release ();
345 	      return true;
346 	    }
347 	}
348     }
349   while (path.length ())
350     {
351       basic_block bb = path.pop ();
352       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
353 	{
354 	  gimple *stmt = gsi_stmt (gsi);
355 	  if (gimple_code (stmt) == GIMPLE_CALL
356 	      && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
357 	    {
358 	      int flags = gimple_call_flags (stmt);
359 	      if (flags & (ECF_PURE | ECF_CONST))
360 		size->num_pure_calls_on_hot_path++;
361 	      else
362 		size->num_non_pure_calls_on_hot_path++;
363 	      size->num_branches_on_hot_path ++;
364 	    }
365 	  /* Count inexpensive calls as non-calls, because they will likely
366 	     expand inline.  */
367 	  else if (gimple_code (stmt) != GIMPLE_DEBUG)
368 	    size->non_call_stmts_on_hot_path++;
369 	  if (((gimple_code (stmt) == GIMPLE_COND
370 	        && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
371 		    || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
372 						loop)))
373 	       || (gimple_code (stmt) == GIMPLE_SWITCH
374 		   && !constant_after_peeling (gimple_switch_index (
375 						 as_a <gswitch *> (stmt)),
376 					       stmt, loop)))
377 	      && (!exit || bb != exit->src))
378 	    size->num_branches_on_hot_path++;
379 	}
380     }
381   path.release ();
382   if (dump_file && (dump_flags & TDF_DETAILS))
383     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
384     	     size->eliminated_by_peeling, size->last_iteration,
385 	     size->last_iteration_eliminated_by_peeling);
386 
387   free (body);
388   return false;
389 }
390 
391 /* Estimate number of insns of completely unrolled loop.
392    It is (NUNROLL + 1) * size of loop body with taking into account
393    the fact that in last copy everything after exit conditional
394    is dead and that some instructions will be eliminated after
395    peeling.
396 
397    Loop body is likely going to simplify further, this is difficult
398    to guess, we just decrease the result by 1/3.  */
399 
400 static unsigned HOST_WIDE_INT
estimated_unrolled_size(struct loop_size * size,unsigned HOST_WIDE_INT nunroll)401 estimated_unrolled_size (struct loop_size *size,
402 			 unsigned HOST_WIDE_INT nunroll)
403 {
404   HOST_WIDE_INT unr_insns = ((nunroll)
405   			     * (HOST_WIDE_INT) (size->overall
406 			     			- size->eliminated_by_peeling));
407   if (!nunroll)
408     unr_insns = 0;
409   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
410 
411   unr_insns = unr_insns * 2 / 3;
412   if (unr_insns <= 0)
413     unr_insns = 1;
414 
415   return unr_insns;
416 }
417 
418 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
419    body that can be remove to make the loop to always exit and at
420    the same time it does not make any code potentially executed
421    during the last iteration dead.
422 
423    After complete unrolling we still may get rid of the conditional
424    on the exit in the last copy even if we have no idea what it does.
425    This is quite common case for loops of form
426 
427      int a[5];
428      for (i=0;i<b;i++)
429        a[i]=0;
430 
431    Here we prove the loop to iterate 5 times but we do not know
432    it from induction variable.
433 
434    For now we handle only simple case where there is exit condition
435    just before the latch block and the latch block contains no statements
436    with side effect that may otherwise terminate the execution of loop
437    (such as by EH or by terminating the program or longjmp).
438 
439    In the general case we may want to cancel the paths leading to statements
440    loop-niter identified as having undefined effect in the last iteration.
441    The other cases are hopefully rare and will be cleaned up later.  */
442 
443 static edge
loop_edge_to_cancel(struct loop * loop)444 loop_edge_to_cancel (struct loop *loop)
445 {
446   vec<edge> exits;
447   unsigned i;
448   edge edge_to_cancel;
449   gimple_stmt_iterator gsi;
450 
451   /* We want only one predecestor of the loop.  */
452   if (EDGE_COUNT (loop->latch->preds) > 1)
453     return NULL;
454 
455   exits = get_loop_exit_edges (loop);
456 
457   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
458     {
459        /* Find the other edge than the loop exit
460           leaving the conditoinal.  */
461        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
462          continue;
463        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
464          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
465        else
466          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
467 
468       /* We only can handle conditionals.  */
469       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
470 	continue;
471 
472       /* We should never have conditionals in the loop latch. */
473       gcc_assert (edge_to_cancel->dest != loop->header);
474 
475       /* Check that it leads to loop latch.  */
476       if (edge_to_cancel->dest != loop->latch)
477         continue;
478 
479       exits.release ();
480 
481       /* Verify that the code in loop latch does nothing that may end program
482          execution without really reaching the exit.  This may include
483 	 non-pure/const function calls, EH statements, volatile ASMs etc.  */
484       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
485 	if (gimple_has_side_effects (gsi_stmt (gsi)))
486 	   return NULL;
487       return edge_to_cancel;
488     }
489   exits.release ();
490   return NULL;
491 }
492 
493 /* Remove all tests for exits that are known to be taken after LOOP was
494    peeled NPEELED times. Put gcc_unreachable before every statement
495    known to not be executed.  */
496 
497 static bool
remove_exits_and_undefined_stmts(struct loop * loop,unsigned int npeeled)498 remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
499 {
500   struct nb_iter_bound *elt;
501   bool changed = false;
502 
503   for (elt = loop->bounds; elt; elt = elt->next)
504     {
505       /* If statement is known to be undefined after peeling, turn it
506 	 into unreachable (or trap when debugging experience is supposed
507 	 to be good).  */
508       if (!elt->is_exit
509 	  && wi::ltu_p (elt->bound, npeeled))
510 	{
511 	  gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
512 	  gcall *stmt = gimple_build_call
513 	      (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
514 	  gimple_set_location (stmt, gimple_location (elt->stmt));
515 	  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
516 	  split_block (gimple_bb (stmt), stmt);
517 	  changed = true;
518 	  if (dump_file && (dump_flags & TDF_DETAILS))
519 	    {
520 	      fprintf (dump_file, "Forced statement unreachable: ");
521 	      print_gimple_stmt (dump_file, elt->stmt, 0);
522 	    }
523 	}
524       /* If we know the exit will be taken after peeling, update.  */
525       else if (elt->is_exit
526 	       && wi::leu_p (elt->bound, npeeled))
527 	{
528 	  basic_block bb = gimple_bb (elt->stmt);
529 	  edge exit_edge = EDGE_SUCC (bb, 0);
530 
531 	  if (dump_file && (dump_flags & TDF_DETAILS))
532 	    {
533 	      fprintf (dump_file, "Forced exit to be taken: ");
534 	      print_gimple_stmt (dump_file, elt->stmt, 0);
535 	    }
536 	  if (!loop_exit_edge_p (loop, exit_edge))
537 	    exit_edge = EDGE_SUCC (bb, 1);
538 	  exit_edge->probability = profile_probability::always ();
539 	  gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
540 	  gcond *cond_stmt = as_a <gcond *> (elt->stmt);
541 	  if (exit_edge->flags & EDGE_TRUE_VALUE)
542 	    gimple_cond_make_true (cond_stmt);
543 	  else
544 	    gimple_cond_make_false (cond_stmt);
545 	  update_stmt (cond_stmt);
546 	  changed = true;
547 	}
548     }
549   return changed;
550 }
551 
552 /* Remove all exits that are known to be never taken because of the loop bound
553    discovered.  */
554 
555 static bool
remove_redundant_iv_tests(struct loop * loop)556 remove_redundant_iv_tests (struct loop *loop)
557 {
558   struct nb_iter_bound *elt;
559   bool changed = false;
560 
561   if (!loop->any_upper_bound)
562     return false;
563   for (elt = loop->bounds; elt; elt = elt->next)
564     {
565       /* Exit is pointless if it won't be taken before loop reaches
566 	 upper bound.  */
567       if (elt->is_exit && loop->any_upper_bound
568           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
569 	{
570 	  basic_block bb = gimple_bb (elt->stmt);
571 	  edge exit_edge = EDGE_SUCC (bb, 0);
572 	  struct tree_niter_desc niter;
573 
574 	  if (!loop_exit_edge_p (loop, exit_edge))
575 	    exit_edge = EDGE_SUCC (bb, 1);
576 
577 	  /* Only when we know the actual number of iterations, not
578 	     just a bound, we can remove the exit.  */
579 	  if (!number_of_iterations_exit (loop, exit_edge,
580 					  &niter, false, false)
581 	      || !integer_onep (niter.assumptions)
582 	      || !integer_zerop (niter.may_be_zero)
583 	      || !niter.niter
584 	      || TREE_CODE (niter.niter) != INTEGER_CST
585 	      || !wi::ltu_p (loop->nb_iterations_upper_bound,
586 			     wi::to_widest (niter.niter)))
587 	    continue;
588 
589 	  if (dump_file && (dump_flags & TDF_DETAILS))
590 	    {
591 	      fprintf (dump_file, "Removed pointless exit: ");
592 	      print_gimple_stmt (dump_file, elt->stmt, 0);
593 	    }
594 	  gcond *cond_stmt = as_a <gcond *> (elt->stmt);
595 	  if (exit_edge->flags & EDGE_TRUE_VALUE)
596 	    gimple_cond_make_false (cond_stmt);
597 	  else
598 	    gimple_cond_make_true (cond_stmt);
599 	  update_stmt (cond_stmt);
600 	  changed = true;
601 	}
602     }
603   return changed;
604 }
605 
606 /* Stores loops that will be unlooped and edges that will be removed
607    after we process whole loop tree. */
608 static vec<loop_p> loops_to_unloop;
609 static vec<int> loops_to_unloop_nunroll;
610 static vec<edge> edges_to_remove;
611 /* Stores loops that has been peeled.  */
612 static bitmap peeled_loops;
613 
614 /* Cancel all fully unrolled loops by putting __builtin_unreachable
615    on the latch edge.
616    We do it after all unrolling since unlooping moves basic blocks
617    across loop boundaries trashing loop closed SSA form as well
618    as SCEV info needed to be intact during unrolling.
619 
620    IRRED_INVALIDATED is used to bookkeep if information about
621    irreducible regions may become invalid as a result
622    of the transformation.
623    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
624    when we need to go into loop closed SSA form.  */
625 
626 static void
unloop_loops(bitmap loop_closed_ssa_invalidated,bool * irred_invalidated)627 unloop_loops (bitmap loop_closed_ssa_invalidated,
628 	      bool *irred_invalidated)
629 {
630   while (loops_to_unloop.length ())
631     {
632       struct loop *loop = loops_to_unloop.pop ();
633       int n_unroll = loops_to_unloop_nunroll.pop ();
634       basic_block latch = loop->latch;
635       edge latch_edge = loop_latch_edge (loop);
636       int flags = latch_edge->flags;
637       location_t locus = latch_edge->goto_locus;
638       gcall *stmt;
639       gimple_stmt_iterator gsi;
640 
641       remove_exits_and_undefined_stmts (loop, n_unroll);
642 
643       /* Unloop destroys the latch edge.  */
644       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
645 
646       /* Create new basic block for the latch edge destination and wire
647 	 it in.  */
648       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
649       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
650       latch_edge->probability = profile_probability::never ();
651       latch_edge->flags |= flags;
652       latch_edge->goto_locus = locus;
653 
654       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
655       latch_edge->dest->count = profile_count::zero ();
656       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
657 
658       gsi = gsi_start_bb (latch_edge->dest);
659       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
660     }
661   loops_to_unloop.release ();
662   loops_to_unloop_nunroll.release ();
663 
664   /* Remove edges in peeled copies.  Given remove_path removes dominated
665      regions we need to cope with removal of already removed paths.  */
666   unsigned i;
667   edge e;
668   auto_vec<int, 20> src_bbs;
669   src_bbs.reserve_exact (edges_to_remove.length ());
670   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
671     src_bbs.quick_push (e->src->index);
672   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
673     if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
674       {
675 	bool ok = remove_path (e, irred_invalidated,
676 			       loop_closed_ssa_invalidated);
677 	gcc_assert (ok);
678       }
679   edges_to_remove.release ();
680 }
681 
682 /* Tries to unroll LOOP completely, i.e. NITER times.
683    UL determines which loops we are allowed to unroll.
684    EXIT is the exit of the loop that should be eliminated.
685    MAXITER specfy bound on number of iterations, -1 if it is
686    not known or too large for HOST_WIDE_INT.  The location
687    LOCUS corresponding to the loop is used when emitting
688    a summary of the unroll to the dump file.  */
689 
690 static bool
try_unroll_loop_completely(struct loop * loop,edge exit,tree niter,bool may_be_zero,enum unroll_level ul,HOST_WIDE_INT maxiter,dump_user_location_t locus,bool allow_peel)691 try_unroll_loop_completely (struct loop *loop,
692 			    edge exit, tree niter, bool may_be_zero,
693 			    enum unroll_level ul,
694 			    HOST_WIDE_INT maxiter,
695 			    dump_user_location_t locus, bool allow_peel)
696 {
697   unsigned HOST_WIDE_INT n_unroll = 0;
698   bool n_unroll_found = false;
699   edge edge_to_cancel = NULL;
700 
701   /* See if we proved number of iterations to be low constant.
702 
703      EXIT is an edge that will be removed in all but last iteration of
704      the loop.
705 
706      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
707      of the unrolled sequence and is expected to make the final loop not
708      rolling.
709 
710      If the number of execution of loop is determined by standard induction
711      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
712      from the iv test.  */
713   if (tree_fits_uhwi_p (niter))
714     {
715       n_unroll = tree_to_uhwi (niter);
716       n_unroll_found = true;
717       edge_to_cancel = EDGE_SUCC (exit->src, 0);
718       if (edge_to_cancel == exit)
719 	edge_to_cancel = EDGE_SUCC (exit->src, 1);
720     }
721   /* We do not know the number of iterations and thus we cannot eliminate
722      the EXIT edge.  */
723   else
724     exit = NULL;
725 
726   /* See if we can improve our estimate by using recorded loop bounds.  */
727   if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
728       && maxiter >= 0
729       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
730     {
731       n_unroll = maxiter;
732       n_unroll_found = true;
733       /* Loop terminates before the IV variable test, so we cannot
734 	 remove it in the last iteration.  */
735       edge_to_cancel = NULL;
736     }
737 
738   if (!n_unroll_found)
739     return false;
740 
741   if (!loop->unroll
742       && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
743     {
744       if (dump_file && (dump_flags & TDF_DETAILS))
745 	fprintf (dump_file, "Not unrolling loop %d "
746 		 "(--param max-completely-peel-times limit reached).\n",
747 		 loop->num);
748       return false;
749     }
750 
751   if (!edge_to_cancel)
752     edge_to_cancel = loop_edge_to_cancel (loop);
753 
754   if (n_unroll)
755     {
756       if (ul == UL_SINGLE_ITER)
757 	return false;
758 
759       if (loop->unroll)
760 	{
761 	  /* If the unrolling factor is too large, bail out.  */
762 	  if (n_unroll > (unsigned)loop->unroll)
763 	    {
764 	      if (dump_file && (dump_flags & TDF_DETAILS))
765 		fprintf (dump_file,
766 			 "Not unrolling loop %d: "
767 			 "user didn't want it unrolled completely.\n",
768 			 loop->num);
769 	      return false;
770 	    }
771 	}
772       else
773 	{
774 	  struct loop_size size;
775 	  /* EXIT can be removed only if we are sure it passes first N_UNROLL
776 	     iterations.  */
777 	  bool remove_exit = (exit && niter
778 			      && TREE_CODE (niter) == INTEGER_CST
779 			      && wi::leu_p (n_unroll, wi::to_widest (niter)));
780 	  bool large
781 	    = tree_estimate_loop_size
782 		(loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
783 		 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
784 	  if (large)
785 	    {
786 	      if (dump_file && (dump_flags & TDF_DETAILS))
787 		fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
788 			 loop->num);
789 	      return false;
790 	    }
791 
792 	  unsigned HOST_WIDE_INT ninsns = size.overall;
793 	  unsigned HOST_WIDE_INT unr_insns
794 	    = estimated_unrolled_size (&size, n_unroll);
795 	  if (dump_file && (dump_flags & TDF_DETAILS))
796 	    {
797 	      fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
798 	      fprintf (dump_file, "  Estimated size after unrolling: %d\n",
799 		       (int) unr_insns);
800 	    }
801 
802 	  /* If the code is going to shrink, we don't need to be extra
803 	     cautious on guessing if the unrolling is going to be
804 	     profitable.  */
805 	  if (unr_insns
806 	      /* If there is IV variable that will become constant, we
807 		 save one instruction in the loop prologue we do not
808 		 account otherwise.  */
809 	      <= ninsns + (size.constant_iv != false))
810 	    ;
811 	  /* We unroll only inner loops, because we do not consider it
812 	     profitable otheriwse.  We still can cancel loopback edge
813 	     of not rolling loop; this is always a good idea.  */
814 	  else if (ul == UL_NO_GROWTH)
815 	    {
816 	      if (dump_file && (dump_flags & TDF_DETAILS))
817 		fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
818 			 loop->num);
819 	      return false;
820 	    }
821 	  /* Outer loops tend to be less interesting candidates for
822 	     complete unrolling unless we can do a lot of propagation
823 	     into the inner loop body.  For now we disable outer loop
824 	     unrolling when the code would grow.  */
825 	  else if (loop->inner)
826 	    {
827 	      if (dump_file && (dump_flags & TDF_DETAILS))
828 		fprintf (dump_file, "Not unrolling loop %d: "
829 			 "it is not innermost and code would grow.\n",
830 			 loop->num);
831 	      return false;
832 	    }
833 	  /* If there is call on a hot path through the loop, then
834 	     there is most probably not much to optimize.  */
835 	  else if (size.num_non_pure_calls_on_hot_path)
836 	    {
837 	      if (dump_file && (dump_flags & TDF_DETAILS))
838 		fprintf (dump_file, "Not unrolling loop %d: "
839 			 "contains call and code would grow.\n",
840 			 loop->num);
841 	      return false;
842 	    }
843 	  /* If there is pure/const call in the function, then we can
844 	     still optimize the unrolled loop body if it contains some
845 	     other interesting code than the calls and code storing or
846 	     cumulating the return value.  */
847 	  else if (size.num_pure_calls_on_hot_path
848 		   /* One IV increment, one test, one ivtmp store and
849 		      one useful stmt.  That is about minimal loop
850 		      doing pure call.  */
851 		   && (size.non_call_stmts_on_hot_path
852 		       <= 3 + size.num_pure_calls_on_hot_path))
853 	    {
854 	      if (dump_file && (dump_flags & TDF_DETAILS))
855 		fprintf (dump_file, "Not unrolling loop %d: "
856 			 "contains just pure calls and code would grow.\n",
857 			 loop->num);
858 	      return false;
859 	    }
860 	  /* Complete unrolling is major win when control flow is
861 	     removed and one big basic block is created.  If the loop
862 	     contains control flow the optimization may still be a win
863 	     because of eliminating the loop overhead but it also may
864 	     blow the branch predictor tables.  Limit number of
865 	     branches on the hot path through the peeled sequence.  */
866 	  else if (size.num_branches_on_hot_path * (int)n_unroll
867 		   > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
868 	    {
869 	      if (dump_file && (dump_flags & TDF_DETAILS))
870 		fprintf (dump_file, "Not unrolling loop %d: "
871 			 "number of branches on hot path in the unrolled "
872 			 "sequence reaches --param max-peel-branches limit.\n",
873 			 loop->num);
874 	      return false;
875 	    }
876 	  else if (unr_insns
877 		   > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
878 	    {
879 	      if (dump_file && (dump_flags & TDF_DETAILS))
880 		fprintf (dump_file, "Not unrolling loop %d: "
881 			 "number of insns in the unrolled sequence reaches "
882 			 "--param max-completely-peeled-insns limit.\n",
883 			 loop->num);
884 	      return false;
885 	    }
886 	}
887 
888       initialize_original_copy_tables ();
889       auto_sbitmap wont_exit (n_unroll + 1);
890       if (exit && niter
891 	  && TREE_CODE (niter) == INTEGER_CST
892 	  && wi::leu_p (n_unroll, wi::to_widest (niter)))
893 	{
894 	  bitmap_ones (wont_exit);
895 	  if (wi::eq_p (wi::to_widest (niter), n_unroll)
896 	      || edge_to_cancel)
897 	    bitmap_clear_bit (wont_exit, 0);
898 	}
899       else
900 	{
901 	  exit = NULL;
902 	  bitmap_clear (wont_exit);
903 	}
904       if (may_be_zero)
905 	bitmap_clear_bit (wont_exit, 1);
906 
907       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
908 						 n_unroll, wont_exit,
909 						 exit, &edges_to_remove,
910 						 DLTHE_FLAG_UPDATE_FREQ
911 						 | DLTHE_FLAG_COMPLETTE_PEEL))
912 	{
913           free_original_copy_tables ();
914 	  if (dump_file && (dump_flags & TDF_DETAILS))
915 	    fprintf (dump_file, "Failed to duplicate the loop\n");
916 	  return false;
917 	}
918 
919       free_original_copy_tables ();
920     }
921 
922   /* Remove the conditional from the last copy of the loop.  */
923   if (edge_to_cancel)
924     {
925       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
926       force_edge_cold (edge_to_cancel, true);
927       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
928 	gimple_cond_make_false (cond);
929       else
930 	gimple_cond_make_true (cond);
931       update_stmt (cond);
932       /* Do not remove the path, as doing so may remove outer loop and
933 	 confuse bookkeeping code in tree_unroll_loops_completely.  */
934     }
935 
936   /* Store the loop for later unlooping and exit removal.  */
937   loops_to_unloop.safe_push (loop);
938   loops_to_unloop_nunroll.safe_push (n_unroll);
939 
940   if (dump_enabled_p ())
941     {
942       if (!n_unroll)
943         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
944                          "loop turned into non-loop; it never loops\n");
945       else
946         {
947           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
948                            "loop with %d iterations completely unrolled",
949 			   (int) n_unroll);
950           if (loop->header->count.initialized_p ())
951             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
952                          " (header execution count %d)",
953                          (int)loop->header->count.to_gcov_type ());
954           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
955         }
956     }
957 
958   if (dump_file && (dump_flags & TDF_DETAILS))
959     {
960       if (exit)
961         fprintf (dump_file, "Exit condition of peeled iterations was "
962 		 "eliminated.\n");
963       if (edge_to_cancel)
964         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
965       else
966         fprintf (dump_file, "Latch of last iteration was marked by "
967 		 "__builtin_unreachable ().\n");
968     }
969 
970   return true;
971 }
972 
973 /* Return number of instructions after peeling.  */
974 static unsigned HOST_WIDE_INT
estimated_peeled_sequence_size(struct loop_size * size,unsigned HOST_WIDE_INT npeel)975 estimated_peeled_sequence_size (struct loop_size *size,
976 			        unsigned HOST_WIDE_INT npeel)
977 {
978   return MAX (npeel * (HOST_WIDE_INT) (size->overall
979 			     	       - size->eliminated_by_peeling), 1);
980 }
981 
982 /* If the loop is expected to iterate N times and is
983    small enough, duplicate the loop body N+1 times before
984    the loop itself.  This way the hot path will never
985    enter the loop.
986    Parameters are the same as for try_unroll_loops_completely */
987 
988 static bool
try_peel_loop(struct loop * loop,edge exit,tree niter,bool may_be_zero,HOST_WIDE_INT maxiter)989 try_peel_loop (struct loop *loop,
990 	       edge exit, tree niter, bool may_be_zero,
991 	       HOST_WIDE_INT maxiter)
992 {
993   HOST_WIDE_INT npeel;
994   struct loop_size size;
995   int peeled_size;
996 
997   if (!flag_peel_loops
998       || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
999       || !peeled_loops)
1000     return false;
1001 
1002   if (bitmap_bit_p (peeled_loops, loop->num))
1003     {
1004       if (dump_file)
1005         fprintf (dump_file, "Not peeling: loop is already peeled\n");
1006       return false;
1007     }
1008 
1009   /* We don't peel loops that will be unrolled as this can duplicate a
1010      loop more times than the user requested.  */
1011   if (loop->unroll)
1012     {
1013       if (dump_file)
1014         fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1015       return false;
1016     }
1017 
1018   /* Peel only innermost loops.
1019      While the code is perfectly capable of peeling non-innermost loops,
1020      the heuristics would probably need some improvements. */
1021   if (loop->inner)
1022     {
1023       if (dump_file)
1024 	fprintf (dump_file, "Not peeling: outer loop\n");
1025       return false;
1026     }
1027 
1028   if (!optimize_loop_for_speed_p (loop))
1029     {
1030       if (dump_file)
1031 	fprintf (dump_file, "Not peeling: cold loop\n");
1032       return false;
1033     }
1034 
1035   /* Check if there is an estimate on the number of iterations.  */
1036   npeel = estimated_loop_iterations_int (loop);
1037   if (npeel < 0)
1038     npeel = likely_max_loop_iterations_int (loop);
1039   if (npeel < 0)
1040     {
1041       if (dump_file)
1042         fprintf (dump_file, "Not peeling: number of iterations is not "
1043 	         "estimated\n");
1044       return false;
1045     }
1046   if (maxiter >= 0 && maxiter <= npeel)
1047     {
1048       if (dump_file)
1049 	fprintf (dump_file, "Not peeling: upper bound is known so can "
1050 		 "unroll completely\n");
1051       return false;
1052     }
1053 
1054   /* We want to peel estimated number of iterations + 1 (so we never
1055      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1056      and be sure to avoid overflows.  */
1057   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
1058     {
1059       if (dump_file)
1060 	fprintf (dump_file, "Not peeling: rolls too much "
1061 		 "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1062       return false;
1063     }
1064   npeel++;
1065 
1066   /* Check peeled loops size.  */
1067   tree_estimate_loop_size (loop, exit, NULL, &size,
1068 			   PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
1069   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1070       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1071     {
1072       if (dump_file)
1073 	fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1074 		 "(%i insns > --param max-peel-insns)", peeled_size);
1075       return false;
1076     }
1077 
1078   /* Duplicate possibly eliminating the exits.  */
1079   initialize_original_copy_tables ();
1080   auto_sbitmap wont_exit (npeel + 1);
1081   if (exit && niter
1082       && TREE_CODE (niter) == INTEGER_CST
1083       && wi::leu_p (npeel, wi::to_widest (niter)))
1084     {
1085       bitmap_ones (wont_exit);
1086       bitmap_clear_bit (wont_exit, 0);
1087     }
1088   else
1089     {
1090       exit = NULL;
1091       bitmap_clear (wont_exit);
1092     }
1093   if (may_be_zero)
1094     bitmap_clear_bit (wont_exit, 1);
1095   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1096 					     npeel, wont_exit,
1097 					     exit, &edges_to_remove,
1098 					     DLTHE_FLAG_UPDATE_FREQ))
1099     {
1100       free_original_copy_tables ();
1101       return false;
1102     }
1103   free_original_copy_tables ();
1104   if (dump_file && (dump_flags & TDF_DETAILS))
1105     {
1106       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1107 	       loop->num, (int) npeel);
1108     }
1109   if (loop->any_estimate)
1110     {
1111       if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1112         loop->nb_iterations_estimate -= npeel;
1113       else
1114 	loop->nb_iterations_estimate = 0;
1115     }
1116   if (loop->any_upper_bound)
1117     {
1118       if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
1119         loop->nb_iterations_upper_bound -= npeel;
1120       else
1121         loop->nb_iterations_upper_bound = 0;
1122     }
1123   if (loop->any_likely_upper_bound)
1124     {
1125       if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
1126 	loop->nb_iterations_likely_upper_bound -= npeel;
1127       else
1128 	{
1129 	  loop->any_estimate = true;
1130 	  loop->nb_iterations_estimate = 0;
1131 	  loop->nb_iterations_likely_upper_bound = 0;
1132 	}
1133     }
1134   profile_count entry_count = profile_count::zero ();
1135 
1136   edge e;
1137   edge_iterator ei;
1138   FOR_EACH_EDGE (e, ei, loop->header->preds)
1139     if (e->src != loop->latch)
1140       {
1141 	if (e->src->count.initialized_p ())
1142 	  entry_count += e->src->count;
1143 	gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1144       }
1145   profile_probability p;
1146   p = entry_count.probability_in (loop->header->count);
1147   scale_loop_profile (loop, p, 0);
1148   bitmap_set_bit (peeled_loops, loop->num);
1149   return true;
1150 }
1151 /* Adds a canonical induction variable to LOOP if suitable.
1152    CREATE_IV is true if we may create a new iv.  UL determines
1153    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1154    to determine the number of iterations of a loop by direct evaluation.
1155    Returns true if cfg is changed.   */
1156 
1157 static bool
canonicalize_loop_induction_variables(struct loop * loop,bool create_iv,enum unroll_level ul,bool try_eval,bool allow_peel)1158 canonicalize_loop_induction_variables (struct loop *loop,
1159 				       bool create_iv, enum unroll_level ul,
1160 				       bool try_eval, bool allow_peel)
1161 {
1162   edge exit = NULL;
1163   tree niter;
1164   HOST_WIDE_INT maxiter;
1165   bool modified = false;
1166   dump_user_location_t locus;
1167   struct tree_niter_desc niter_desc;
1168   bool may_be_zero = false;
1169 
1170   /* For unrolling allow conditional constant or zero iterations, thus
1171      perform loop-header copying on-the-fly.  */
1172   exit = single_exit (loop);
1173   niter = chrec_dont_know;
1174   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1175     {
1176       niter = niter_desc.niter;
1177       may_be_zero
1178 	= niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1179     }
1180   if (TREE_CODE (niter) == INTEGER_CST)
1181     locus = last_stmt (exit->src);
1182   else
1183     {
1184       /* For non-constant niter fold may_be_zero into niter again.  */
1185       if (may_be_zero)
1186 	{
1187 	  if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1188 	    niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1189 				 niter_desc.may_be_zero,
1190 				 build_int_cst (TREE_TYPE (niter), 0), niter);
1191 	  else
1192 	    niter = chrec_dont_know;
1193 	  may_be_zero = false;
1194 	}
1195 
1196       /* If the loop has more than one exit, try checking all of them
1197 	 for # of iterations determinable through scev.  */
1198       if (!exit)
1199 	niter = find_loop_niter (loop, &exit);
1200 
1201       /* Finally if everything else fails, try brute force evaluation.  */
1202       if (try_eval
1203 	  && (chrec_contains_undetermined (niter)
1204 	      || TREE_CODE (niter) != INTEGER_CST))
1205 	niter = find_loop_niter_by_eval (loop, &exit);
1206 
1207       if (exit)
1208         locus = last_stmt (exit->src);
1209 
1210       if (TREE_CODE (niter) != INTEGER_CST)
1211 	exit = NULL;
1212     }
1213 
1214   /* We work exceptionally hard here to estimate the bound
1215      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1216   if (niter && TREE_CODE (niter) == INTEGER_CST)
1217     {
1218       record_niter_bound (loop, wi::to_widest (niter),
1219 			  exit == single_likely_exit (loop), true);
1220     }
1221 
1222   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1223   maxiter = max_loop_iterations_int (loop);
1224 
1225   if (dump_file && (dump_flags & TDF_DETAILS)
1226       && TREE_CODE (niter) == INTEGER_CST)
1227     {
1228       fprintf (dump_file, "Loop %d iterates ", loop->num);
1229       print_generic_expr (dump_file, niter, TDF_SLIM);
1230       fprintf (dump_file, " times.\n");
1231     }
1232   if (dump_file && (dump_flags & TDF_DETAILS)
1233       && maxiter >= 0)
1234     {
1235       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1236 	       (int)maxiter);
1237     }
1238   if (dump_file && (dump_flags & TDF_DETAILS)
1239       && likely_max_loop_iterations_int (loop) >= 0)
1240     {
1241       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1242 	       loop->num, (int)likely_max_loop_iterations_int (loop));
1243     }
1244 
1245   /* Remove exits that are known to be never taken based on loop bound.
1246      Needs to be called after compilation of max_loop_iterations_int that
1247      populates the loop bounds.  */
1248   modified |= remove_redundant_iv_tests (loop);
1249 
1250   if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1251 				  maxiter, locus, allow_peel))
1252     return true;
1253 
1254   if (create_iv
1255       && niter && !chrec_contains_undetermined (niter)
1256       && exit && just_once_each_iteration_p (loop, exit->src))
1257     {
1258       tree iv_niter = niter;
1259       if (may_be_zero)
1260 	{
1261 	  if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1262 	    iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1263 				    niter_desc.may_be_zero,
1264 				    build_int_cst (TREE_TYPE (iv_niter), 0),
1265 				    iv_niter);
1266 	  else
1267 	    iv_niter = NULL_TREE;
1268 	}
1269       if (iv_niter)
1270 	create_canonical_iv (loop, exit, iv_niter);
1271     }
1272 
1273   if (ul == UL_ALL)
1274     modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1275 
1276   return modified;
1277 }
1278 
1279 /* The main entry point of the pass.  Adds canonical induction variables
1280    to the suitable loops.  */
1281 
1282 unsigned int
canonicalize_induction_variables(void)1283 canonicalize_induction_variables (void)
1284 {
1285   struct loop *loop;
1286   bool changed = false;
1287   bool irred_invalidated = false;
1288   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1289 
1290   estimate_numbers_of_iterations (cfun);
1291 
1292   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1293     {
1294       changed |= canonicalize_loop_induction_variables (loop,
1295 							true, UL_SINGLE_ITER,
1296 							true, false);
1297     }
1298   gcc_assert (!need_ssa_update_p (cfun));
1299 
1300   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1301   if (irred_invalidated
1302       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1303     mark_irreducible_loops ();
1304 
1305   /* Clean up the information about numbers of iterations, since brute force
1306      evaluation could reveal new information.  */
1307   free_numbers_of_iterations_estimates (cfun);
1308   scev_reset ();
1309 
1310   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1311     {
1312       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1313       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1314     }
1315   BITMAP_FREE (loop_closed_ssa_invalidated);
1316 
1317   if (changed)
1318     return TODO_cleanup_cfg;
1319   return 0;
1320 }
1321 
1322 /* Process loops from innermost to outer, stopping at the innermost
1323    loop we unrolled.  */
1324 
1325 static bool
tree_unroll_loops_completely_1(bool may_increase_size,bool unroll_outer,bitmap father_bbs,struct loop * loop)1326 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1327 				bitmap father_bbs, struct loop *loop)
1328 {
1329   struct loop *loop_father;
1330   bool changed = false;
1331   struct loop *inner;
1332   enum unroll_level ul;
1333   unsigned num = number_of_loops (cfun);
1334 
1335   /* Process inner loops first.  Don't walk loops added by the recursive
1336      calls because SSA form is not up-to-date.  They can be handled in the
1337      next iteration.  */
1338   bitmap child_father_bbs = NULL;
1339   for (inner = loop->inner; inner != NULL; inner = inner->next)
1340     if ((unsigned) inner->num < num)
1341       {
1342 	if (!child_father_bbs)
1343 	  child_father_bbs = BITMAP_ALLOC (NULL);
1344 	if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1345 					    child_father_bbs, inner))
1346 	  {
1347 	    bitmap_ior_into (father_bbs, child_father_bbs);
1348 	    bitmap_clear (child_father_bbs);
1349 	    changed = true;
1350 	  }
1351       }
1352   if (child_father_bbs)
1353     BITMAP_FREE (child_father_bbs);
1354 
1355   /* If we changed an inner loop we cannot process outer loops in this
1356      iteration because SSA form is not up-to-date.  Continue with
1357      siblings of outer loops instead.  */
1358   if (changed)
1359     {
1360       /* If we are recorded as father clear all other fathers that
1361          are necessarily covered already to avoid redundant work.  */
1362       if (bitmap_bit_p (father_bbs, loop->header->index))
1363 	{
1364 	  bitmap_clear (father_bbs);
1365 	  bitmap_set_bit (father_bbs, loop->header->index);
1366 	}
1367       return true;
1368     }
1369 
1370   /* Don't unroll #pragma omp simd loops until the vectorizer
1371      attempts to vectorize those.  */
1372   if (loop->force_vectorize)
1373     return false;
1374 
1375   /* Try to unroll this loop.  */
1376   loop_father = loop_outer (loop);
1377   if (!loop_father)
1378     return false;
1379 
1380   if (loop->unroll > 1)
1381     ul = UL_ALL;
1382   else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1383       /* Unroll outermost loops only if asked to do so or they do
1384 	 not cause code growth.  */
1385       && (unroll_outer || loop_outer (loop_father)))
1386     ul = UL_ALL;
1387   else
1388     ul = UL_NO_GROWTH;
1389 
1390   if (canonicalize_loop_induction_variables
1391         (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
1392     {
1393       /* If we'll continue unrolling, we need to propagate constants
1394 	 within the new basic blocks to fold away induction variable
1395 	 computations; otherwise, the size might blow up before the
1396 	 iteration is complete and the IR eventually cleaned up.  */
1397       if (loop_outer (loop_father))
1398 	{
1399 	  /* Once we process our father we will have processed
1400 	     the fathers of our children as well, so avoid doing
1401 	     redundant work and clear fathers we've gathered sofar.  */
1402 	  bitmap_clear (father_bbs);
1403 	  bitmap_set_bit (father_bbs, loop_father->header->index);
1404 	}
1405 
1406       return true;
1407     }
1408 
1409   return false;
1410 }
1411 
1412 /* Unroll LOOPS completely if they iterate just few times.  Unless
1413    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1414    size of the code does not increase.  */
1415 
1416 static unsigned int
tree_unroll_loops_completely(bool may_increase_size,bool unroll_outer)1417 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1418 {
1419   bitmap father_bbs = BITMAP_ALLOC (NULL);
1420   bool changed;
1421   int iteration = 0;
1422   bool irred_invalidated = false;
1423 
1424   estimate_numbers_of_iterations (cfun);
1425 
1426   do
1427     {
1428       changed = false;
1429       bitmap loop_closed_ssa_invalidated = NULL;
1430 
1431       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1432 	loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1433 
1434       free_numbers_of_iterations_estimates (cfun);
1435       estimate_numbers_of_iterations (cfun);
1436 
1437       changed = tree_unroll_loops_completely_1 (may_increase_size,
1438 						unroll_outer, father_bbs,
1439 						current_loops->tree_root);
1440       if (changed)
1441 	{
1442 	  unsigned i;
1443 
1444           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1445 
1446 	  /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
1447 	  if (loop_closed_ssa_invalidated
1448 	      && !bitmap_empty_p (loop_closed_ssa_invalidated))
1449             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1450 					  TODO_update_ssa);
1451 	  else
1452 	    update_ssa (TODO_update_ssa);
1453 
1454 	  /* father_bbs is a bitmap of loop father header BB indices.
1455 	     Translate that to what non-root loops these BBs belong to now.  */
1456 	  bitmap_iterator bi;
1457 	  bitmap fathers = BITMAP_ALLOC (NULL);
1458 	  EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1459 	    {
1460 	      basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1461 	      if (! unrolled_loop_bb)
1462 		continue;
1463 	      if (loop_outer (unrolled_loop_bb->loop_father))
1464 		bitmap_set_bit (fathers,
1465 				unrolled_loop_bb->loop_father->num);
1466 	    }
1467 	  bitmap_clear (father_bbs);
1468 	  /* Propagate the constants within the new basic blocks.  */
1469 	  EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1470 	    {
1471 	      loop_p father = get_loop (cfun, i);
1472 	      bitmap exit_bbs = BITMAP_ALLOC (NULL);
1473 	      loop_exit *exit = father->exits->next;
1474 	      while (exit->e)
1475 		{
1476 		  bitmap_set_bit (exit_bbs, exit->e->dest->index);
1477 		  exit = exit->next;
1478 		}
1479 	      do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1480 	    }
1481 	  BITMAP_FREE (fathers);
1482 
1483 	  /* This will take care of removing completely unrolled loops
1484 	     from the loop structures so we can continue unrolling now
1485 	     innermost loops.  */
1486 	  if (cleanup_tree_cfg ())
1487 	    update_ssa (TODO_update_ssa_only_virtuals);
1488 
1489 	  /* Clean up the information about numbers of iterations, since
1490 	     complete unrolling might have invalidated it.  */
1491 	  scev_reset ();
1492 	  if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1493 	    verify_loop_closed_ssa (true);
1494 	}
1495       if (loop_closed_ssa_invalidated)
1496         BITMAP_FREE (loop_closed_ssa_invalidated);
1497     }
1498   while (changed
1499 	 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
1500 
1501   BITMAP_FREE (father_bbs);
1502 
1503   if (irred_invalidated
1504       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1505     mark_irreducible_loops ();
1506 
1507   return 0;
1508 }
1509 
1510 /* Canonical induction variable creation pass.  */
1511 
1512 namespace {
1513 
1514 const pass_data pass_data_iv_canon =
1515 {
1516   GIMPLE_PASS, /* type */
1517   "ivcanon", /* name */
1518   OPTGROUP_LOOP, /* optinfo_flags */
1519   TV_TREE_LOOP_IVCANON, /* tv_id */
1520   ( PROP_cfg | PROP_ssa ), /* properties_required */
1521   0, /* properties_provided */
1522   0, /* properties_destroyed */
1523   0, /* todo_flags_start */
1524   0, /* todo_flags_finish */
1525 };
1526 
1527 class pass_iv_canon : public gimple_opt_pass
1528 {
1529 public:
pass_iv_canon(gcc::context * ctxt)1530   pass_iv_canon (gcc::context *ctxt)
1531     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1532   {}
1533 
1534   /* opt_pass methods: */
gate(function *)1535   virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1536   virtual unsigned int execute (function *fun);
1537 
1538 }; // class pass_iv_canon
1539 
1540 unsigned int
execute(function * fun)1541 pass_iv_canon::execute (function *fun)
1542 {
1543   if (number_of_loops (fun) <= 1)
1544     return 0;
1545 
1546   return canonicalize_induction_variables ();
1547 }
1548 
1549 } // anon namespace
1550 
1551 gimple_opt_pass *
make_pass_iv_canon(gcc::context * ctxt)1552 make_pass_iv_canon (gcc::context *ctxt)
1553 {
1554   return new pass_iv_canon (ctxt);
1555 }
1556 
1557 /* Complete unrolling of loops.  */
1558 
1559 namespace {
1560 
1561 const pass_data pass_data_complete_unroll =
1562 {
1563   GIMPLE_PASS, /* type */
1564   "cunroll", /* name */
1565   OPTGROUP_LOOP, /* optinfo_flags */
1566   TV_COMPLETE_UNROLL, /* tv_id */
1567   ( PROP_cfg | PROP_ssa ), /* properties_required */
1568   0, /* properties_provided */
1569   0, /* properties_destroyed */
1570   0, /* todo_flags_start */
1571   0, /* todo_flags_finish */
1572 };
1573 
1574 class pass_complete_unroll : public gimple_opt_pass
1575 {
1576 public:
pass_complete_unroll(gcc::context * ctxt)1577   pass_complete_unroll (gcc::context *ctxt)
1578     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1579   {}
1580 
1581   /* opt_pass methods: */
1582   virtual unsigned int execute (function *);
1583 
1584 }; // class pass_complete_unroll
1585 
1586 unsigned int
execute(function * fun)1587 pass_complete_unroll::execute (function *fun)
1588 {
1589   if (number_of_loops (fun) <= 1)
1590     return 0;
1591 
1592   /* If we ever decide to run loop peeling more than once, we will need to
1593      track loops already peeled in loop structures themselves to avoid
1594      re-peeling the same loop multiple times.  */
1595   if (flag_peel_loops)
1596     peeled_loops = BITMAP_ALLOC (NULL);
1597   unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
1598 						   || flag_peel_loops
1599 						   || optimize >= 3, true);
1600   if (peeled_loops)
1601     {
1602       BITMAP_FREE (peeled_loops);
1603       peeled_loops = NULL;
1604     }
1605   return val;
1606 }
1607 
1608 } // anon namespace
1609 
1610 gimple_opt_pass *
make_pass_complete_unroll(gcc::context * ctxt)1611 make_pass_complete_unroll (gcc::context *ctxt)
1612 {
1613   return new pass_complete_unroll (ctxt);
1614 }
1615 
1616 /* Complete unrolling of inner loops.  */
1617 
1618 namespace {
1619 
1620 const pass_data pass_data_complete_unrolli =
1621 {
1622   GIMPLE_PASS, /* type */
1623   "cunrolli", /* name */
1624   OPTGROUP_LOOP, /* optinfo_flags */
1625   TV_COMPLETE_UNROLL, /* tv_id */
1626   ( PROP_cfg | PROP_ssa ), /* properties_required */
1627   0, /* properties_provided */
1628   0, /* properties_destroyed */
1629   0, /* todo_flags_start */
1630   0, /* todo_flags_finish */
1631 };
1632 
1633 class pass_complete_unrolli : public gimple_opt_pass
1634 {
1635 public:
pass_complete_unrolli(gcc::context * ctxt)1636   pass_complete_unrolli (gcc::context *ctxt)
1637     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1638   {}
1639 
1640   /* opt_pass methods: */
gate(function *)1641   virtual bool gate (function *) { return optimize >= 2; }
1642   virtual unsigned int execute (function *);
1643 
1644 }; // class pass_complete_unrolli
1645 
1646 unsigned int
execute(function * fun)1647 pass_complete_unrolli::execute (function *fun)
1648 {
1649   unsigned ret = 0;
1650 
1651   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1652   if (number_of_loops (fun) > 1)
1653     {
1654       scev_initialize ();
1655       ret = tree_unroll_loops_completely (optimize >= 3, false);
1656       scev_finalize ();
1657     }
1658   loop_optimizer_finalize ();
1659 
1660   return ret;
1661 }
1662 
1663 } // anon namespace
1664 
1665 gimple_opt_pass *
make_pass_complete_unrolli(gcc::context * ctxt)1666 make_pass_complete_unrolli (gcc::context *ctxt)
1667 {
1668   return new pass_complete_unrolli (ctxt);
1669 }
1670 
1671 
1672