1 /* Induction variable canonicalization.
2    Copyright (C) 2004, 2005, 2007, 2008, 2010
3    Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass detects the loops that iterate a constant number of times,
22    adds a canonical induction variable (step -1, tested against 0)
23    and replaces the exit test.  This enables the less powerful rtl
24    level analysis to use this information.
25 
26    This might spoil the code in some cases (by increasing register pressure).
27    Note that in the case the new variable is not needed, ivopts will get rid
28    of it, so it might only be a problem when there are no other linear induction
29    variables.  In that case the created optimization possibilities are likely
30    to pay up.
31 
32    Additionally in case we detect that it is beneficial to unroll the
33    loop completely, we do it right here to expose the optimization
34    possibilities to the following passes.  */
35 
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tm.h"
40 #include "tree.h"
41 #include "tm_p.h"
42 #include "basic-block.h"
43 #include "tree-pretty-print.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-flow.h"
46 #include "tree-dump.h"
47 #include "cfgloop.h"
48 #include "tree-pass.h"
49 #include "tree-chrec.h"
50 #include "tree-scalar-evolution.h"
51 #include "params.h"
52 #include "flags.h"
53 #include "tree-inline.h"
54 #include "target.h"
55 
56 /* Specifies types of loops that may be unrolled.  */
57 
58 enum unroll_level
59 {
60   UL_SINGLE_ITER,	/* Only loops that exit immediately in the first
61 			   iteration.  */
62   UL_NO_GROWTH,		/* Only loops whose unrolling will not cause increase
63 			   of code size.  */
64   UL_ALL		/* All suitable loops.  */
65 };
66 
67 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
68    is the exit edge whose condition is replaced.  */
69 
70 static void
71 create_canonical_iv (struct loop *loop, edge exit, tree niter)
72 {
73   edge in;
74   tree type, var;
75   gimple cond;
76   gimple_stmt_iterator incr_at;
77   enum tree_code cmp;
78 
79   if (dump_file && (dump_flags & TDF_DETAILS))
80     {
81       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
82       print_generic_expr (dump_file, niter, TDF_SLIM);
83       fprintf (dump_file, " iterations.\n");
84     }
85 
86   cond = last_stmt (exit->src);
87   in = EDGE_SUCC (exit->src, 0);
88   if (in == exit)
89     in = EDGE_SUCC (exit->src, 1);
90 
91   /* Note that we do not need to worry about overflows, since
92      type of niter is always unsigned and all comparisons are
93      just for equality/nonequality -- i.e. everything works
94      with a modulo arithmetics.  */
95 
96   type = TREE_TYPE (niter);
97   niter = fold_build2 (PLUS_EXPR, type,
98 		       niter,
99 		       build_int_cst (type, 1));
100   incr_at = gsi_last_bb (in->src);
101   create_iv (niter,
102 	     build_int_cst (type, -1),
103 	     NULL_TREE, loop,
104 	     &incr_at, false, NULL, &var);
105 
106   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
107   gimple_cond_set_code (cond, cmp);
108   gimple_cond_set_lhs (cond, var);
109   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
110   update_stmt (cond);
111 }
112 
113 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
114 
115 unsigned
116 tree_num_loop_insns (struct loop *loop, eni_weights *weights)
117 {
118   basic_block *body = get_loop_body (loop);
119   gimple_stmt_iterator gsi;
120   unsigned size = 0, i;
121 
122   for (i = 0; i < loop->num_nodes; i++)
123     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
124       size += estimate_num_insns (gsi_stmt (gsi), weights);
125   free (body);
126 
127   return size;
128 }
129 
130 /* Describe size of loop as detected by tree_estimate_loop_size.  */
131 struct loop_size
132 {
133   /* Number of instructions in the loop.  */
134   int overall;
135 
136   /* Number of instructions that will be likely optimized out in
137      peeled iterations of loop  (i.e. computation based on induction
138      variable where induction variable starts at known constant.)  */
139   int eliminated_by_peeling;
140 
141   /* Same statistics for last iteration of loop: it is smaller because
142      instructions after exit are not executed.  */
143   int last_iteration;
144   int last_iteration_eliminated_by_peeling;
145 };
146 
147 /* Return true if OP in STMT will be constant after peeling LOOP.  */
148 
149 static bool
150 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
151 {
152   affine_iv iv;
153 
154   if (is_gimple_min_invariant (op))
155     return true;
156 
157   /* We can still fold accesses to constant arrays when index is known.  */
158   if (TREE_CODE (op) != SSA_NAME)
159     {
160       tree base = op;
161 
162       /* First make fast look if we see constant array inside.  */
163       while (handled_component_p (base))
164 	base = TREE_OPERAND (base, 0);
165       if ((DECL_P (base) == VAR_DECL
166 	   && const_value_known_p (base))
167 	  || CONSTANT_CLASS_P (base))
168 	{
169 	  /* If so, see if we understand all the indices.  */
170 	  base = op;
171 	  while (handled_component_p (base))
172 	    {
173 	      if (TREE_CODE (base) == ARRAY_REF
174 		  && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
175 		return false;
176 	      base = TREE_OPERAND (base, 0);
177 	    }
178 	  return true;
179 	}
180       return false;
181     }
182 
183   /* Induction variables are constants.  */
184   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
185     return false;
186   if (!is_gimple_min_invariant (iv.base))
187     return false;
188   if (!is_gimple_min_invariant (iv.step))
189     return false;
190   return true;
191 }
192 
193 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
194    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
195 
196 static void
197 tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
198 {
199   basic_block *body = get_loop_body (loop);
200   gimple_stmt_iterator gsi;
201   unsigned int i;
202   bool after_exit;
203 
204   size->overall = 0;
205   size->eliminated_by_peeling = 0;
206   size->last_iteration = 0;
207   size->last_iteration_eliminated_by_peeling = 0;
208 
209   if (dump_file && (dump_flags & TDF_DETAILS))
210     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
211   for (i = 0; i < loop->num_nodes; i++)
212     {
213       if (exit && body[i] != exit->src
214 	  && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
215 	after_exit = true;
216       else
217 	after_exit = false;
218       if (dump_file && (dump_flags & TDF_DETAILS))
219 	fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
220 
221       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
222 	{
223 	  gimple stmt = gsi_stmt (gsi);
224 	  int num = estimate_num_insns (stmt, &eni_size_weights);
225 	  bool likely_eliminated = false;
226 
227 	  if (dump_file && (dump_flags & TDF_DETAILS))
228 	    {
229 	      fprintf (dump_file, "  size: %3i ", num);
230 	      print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
231 	    }
232 
233 	  /* Look for reasons why we might optimize this stmt away. */
234 
235 	  /* Exit conditional.  */
236 	  if (body[i] == exit->src && stmt == last_stmt (exit->src))
237 	    {
238 	      if (dump_file && (dump_flags & TDF_DETAILS))
239 	        fprintf (dump_file, "   Exit condition will be eliminated.\n");
240 	      likely_eliminated = true;
241 	    }
242 	  /* Sets of IV variables  */
243 	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
244 	      && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
245 	    {
246 	      if (dump_file && (dump_flags & TDF_DETAILS))
247 	        fprintf (dump_file, "   Induction variable computation will"
248 			 " be folded away.\n");
249 	      likely_eliminated = true;
250 	    }
251 	  /* Assignments of IV variables.  */
252 	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
253 		   && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
254 		   && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
255 		   && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
256 		       || constant_after_peeling (gimple_assign_rhs2 (stmt),
257 		       				  stmt, loop)))
258 	    {
259 	      if (dump_file && (dump_flags & TDF_DETAILS))
260 	        fprintf (dump_file, "   Constant expression will be folded away.\n");
261 	      likely_eliminated = true;
262 	    }
263 	  /* Conditionals.  */
264 	  else if (gimple_code (stmt) == GIMPLE_COND
265 		   && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
266 		   && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
267 	    {
268 	      if (dump_file && (dump_flags & TDF_DETAILS))
269 	        fprintf (dump_file, "   Constant conditional.\n");
270 	      likely_eliminated = true;
271 	    }
272 
273 	  size->overall += num;
274 	  if (likely_eliminated)
275 	    size->eliminated_by_peeling += num;
276 	  if (!after_exit)
277 	    {
278 	      size->last_iteration += num;
279 	      if (likely_eliminated)
280 		size->last_iteration_eliminated_by_peeling += num;
281 	    }
282 	}
283     }
284   if (dump_file && (dump_flags & TDF_DETAILS))
285     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
286     	     size->eliminated_by_peeling, size->last_iteration,
287 	     size->last_iteration_eliminated_by_peeling);
288 
289   free (body);
290 }
291 
292 /* Estimate number of insns of completely unrolled loop.
293    It is (NUNROLL + 1) * size of loop body with taking into account
294    the fact that in last copy everything after exit conditional
295    is dead and that some instructions will be eliminated after
296    peeling.
297 
298    Loop body is likely going to simplify futher, this is difficult
299    to guess, we just decrease the result by 1/3.  */
300 
301 static unsigned HOST_WIDE_INT
302 estimated_unrolled_size (struct loop_size *size,
303 			 unsigned HOST_WIDE_INT nunroll)
304 {
305   HOST_WIDE_INT unr_insns = ((nunroll)
306   			     * (HOST_WIDE_INT) (size->overall
307 			     			- size->eliminated_by_peeling));
308   if (!nunroll)
309     unr_insns = 0;
310   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
311 
312   unr_insns = unr_insns * 2 / 3;
313   if (unr_insns <= 0)
314     unr_insns = 1;
315 
316   return unr_insns;
317 }
318 
319 /* Tries to unroll LOOP completely, i.e. NITER times.
320    UL determines which loops we are allowed to unroll.
321    EXIT is the exit of the loop that should be eliminated.  */
322 
323 static bool
324 try_unroll_loop_completely (struct loop *loop,
325 			    edge exit, tree niter,
326 			    enum unroll_level ul)
327 {
328   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
329   gimple cond;
330   struct loop_size size;
331 
332   if (loop->inner)
333     return false;
334 
335   if (!host_integerp (niter, 1))
336     return false;
337   n_unroll = tree_low_cst (niter, 1);
338 
339   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
340   if (n_unroll > max_unroll)
341     return false;
342 
343   if (n_unroll)
344     {
345       if (ul == UL_SINGLE_ITER)
346 	return false;
347 
348       tree_estimate_loop_size (loop, exit, &size);
349       ninsns = size.overall;
350 
351       unr_insns = estimated_unrolled_size (&size, n_unroll);
352       if (dump_file && (dump_flags & TDF_DETAILS))
353 	{
354 	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
355 	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
356 		   (int) unr_insns);
357 	}
358 
359       if (unr_insns > ninsns
360 	  && (unr_insns
361 	      > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
362 	{
363 	  if (dump_file && (dump_flags & TDF_DETAILS))
364 	    fprintf (dump_file, "Not unrolling loop %d "
365 		     "(--param max-completely-peeled-insns limit reached).\n",
366 		     loop->num);
367 	  return false;
368 	}
369 
370       if (ul == UL_NO_GROWTH
371 	  && unr_insns > ninsns)
372 	{
373 	  if (dump_file && (dump_flags & TDF_DETAILS))
374 	    fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
375 	  return false;
376 	}
377     }
378 
379   if (n_unroll)
380     {
381       sbitmap wont_exit;
382       edge e;
383       unsigned i;
384       VEC (edge, heap) *to_remove = NULL;
385 
386       initialize_original_copy_tables ();
387       wont_exit = sbitmap_alloc (n_unroll + 1);
388       sbitmap_ones (wont_exit);
389       RESET_BIT (wont_exit, 0);
390 
391       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
392 						 n_unroll, wont_exit,
393 						 exit, &to_remove,
394 						 DLTHE_FLAG_UPDATE_FREQ
395 						 | DLTHE_FLAG_COMPLETTE_PEEL))
396 	{
397           free_original_copy_tables ();
398 	  free (wont_exit);
399 	  return false;
400 	}
401 
402       FOR_EACH_VEC_ELT (edge, to_remove, i, e)
403 	{
404 	  bool ok = remove_path (e);
405 	  gcc_assert (ok);
406 	}
407 
408       VEC_free (edge, heap, to_remove);
409       free (wont_exit);
410       free_original_copy_tables ();
411     }
412 
413   cond = last_stmt (exit->src);
414   if (exit->flags & EDGE_TRUE_VALUE)
415     gimple_cond_make_true (cond);
416   else
417     gimple_cond_make_false (cond);
418   update_stmt (cond);
419   update_ssa (TODO_update_ssa);
420 
421   if (dump_file && (dump_flags & TDF_DETAILS))
422     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
423 
424   return true;
425 }
426 
427 /* Adds a canonical induction variable to LOOP if suitable.
428    CREATE_IV is true if we may create a new iv.  UL determines
429    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
430    to determine the number of iterations of a loop by direct evaluation.
431    Returns true if cfg is changed.  */
432 
433 static bool
434 canonicalize_loop_induction_variables (struct loop *loop,
435 				       bool create_iv, enum unroll_level ul,
436 				       bool try_eval)
437 {
438   edge exit = NULL;
439   tree niter;
440 
441   niter = number_of_latch_executions (loop);
442   if (TREE_CODE (niter) == INTEGER_CST)
443     {
444       exit = single_exit (loop);
445       if (!just_once_each_iteration_p (loop, exit->src))
446 	return false;
447     }
448   else
449     {
450       /* If the loop has more than one exit, try checking all of them
451 	 for # of iterations determinable through scev.  */
452       if (!single_exit (loop))
453 	niter = find_loop_niter (loop, &exit);
454 
455       /* Finally if everything else fails, try brute force evaluation.  */
456       if (try_eval
457 	  && (chrec_contains_undetermined (niter)
458 	      || TREE_CODE (niter) != INTEGER_CST))
459 	niter = find_loop_niter_by_eval (loop, &exit);
460 
461       if (chrec_contains_undetermined (niter)
462 	  || TREE_CODE (niter) != INTEGER_CST)
463 	return false;
464     }
465 
466   if (dump_file && (dump_flags & TDF_DETAILS))
467     {
468       fprintf (dump_file, "Loop %d iterates ", loop->num);
469       print_generic_expr (dump_file, niter, TDF_SLIM);
470       fprintf (dump_file, " times.\n");
471     }
472 
473   if (try_unroll_loop_completely (loop, exit, niter, ul))
474     return true;
475 
476   if (create_iv)
477     create_canonical_iv (loop, exit, niter);
478 
479   return false;
480 }
481 
482 /* The main entry point of the pass.  Adds canonical induction variables
483    to the suitable loops.  */
484 
485 unsigned int
486 canonicalize_induction_variables (void)
487 {
488   loop_iterator li;
489   struct loop *loop;
490   bool changed = false;
491 
492   FOR_EACH_LOOP (li, loop, 0)
493     {
494       changed |= canonicalize_loop_induction_variables (loop,
495 							true, UL_SINGLE_ITER,
496 							true);
497     }
498 
499   /* Clean up the information about numbers of iterations, since brute force
500      evaluation could reveal new information.  */
501   scev_reset ();
502 
503   if (changed)
504     return TODO_cleanup_cfg;
505   return 0;
506 }
507 
508 /* Unroll LOOPS completely if they iterate just few times.  Unless
509    MAY_INCREASE_SIZE is true, perform the unrolling only if the
510    size of the code does not increase.  */
511 
512 unsigned int
513 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
514 {
515   loop_iterator li;
516   struct loop *loop;
517   bool changed;
518   enum unroll_level ul;
519   int iteration = 0;
520 
521   do
522     {
523       changed = false;
524 
525       FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
526 	{
527 	  if (may_increase_size && optimize_loop_for_speed_p (loop)
528 	      /* Unroll outermost loops only if asked to do so or they do
529 		 not cause code growth.  */
530 	      && (unroll_outer
531 		  || loop_outer (loop_outer (loop))))
532 	    ul = UL_ALL;
533 	  else
534 	    ul = UL_NO_GROWTH;
535 	  changed |= canonicalize_loop_induction_variables
536 		       (loop, false, ul, !flag_tree_loop_ivcanon);
537 	}
538 
539       if (changed)
540 	{
541 	  /* This will take care of removing completely unrolled loops
542 	     from the loop structures so we can continue unrolling now
543 	     innermost loops.  */
544 	  if (cleanup_tree_cfg ())
545 	    update_ssa (TODO_update_ssa_only_virtuals);
546 
547 	  /* Clean up the information about numbers of iterations, since
548 	     complete unrolling might have invalidated it.  */
549 	  scev_reset ();
550 	}
551     }
552   while (changed
553 	 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
554 
555   return 0;
556 }
557