1 /* Natural loop analysis code for GNU compiler.
2    Copyright (C) 2002-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "predict.h"
27 #include "memmodel.h"
28 #include "emit-rtl.h"
29 #include "cfgloop.h"
30 #include "explow.h"
31 #include "expr.h"
32 #include "graphds.h"
33 #include "params.h"
34 #include "sreal.h"
35 
36 struct target_cfgloop default_target_cfgloop;
37 #if SWITCHABLE_TARGET
38 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
39 #endif
40 
41 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
42 
43 bool
just_once_each_iteration_p(const struct loop * loop,const_basic_block bb)44 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
45 {
46   /* It must be executed at least once each iteration.  */
47   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
48     return false;
49 
50   /* And just once.  */
51   if (bb->loop_father != loop)
52     return false;
53 
54   /* But this was not enough.  We might have some irreducible loop here.  */
55   if (bb->flags & BB_IRREDUCIBLE_LOOP)
56     return false;
57 
58   return true;
59 }
60 
61 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
62    throw away all latch edges and mark blocks inside any remaining cycle.
63    Everything is a bit complicated due to fact we do not want to do this
64    for parts of cycles that only "pass" through some loop -- i.e. for
65    each cycle, we want to mark blocks that belong directly to innermost
66    loop containing the whole cycle.
67 
68    LOOPS is the loop tree.  */
69 
70 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
71 #define BB_REPR(BB) ((BB)->index + 1)
72 
73 bool
mark_irreducible_loops(void)74 mark_irreducible_loops (void)
75 {
76   basic_block act;
77   struct graph_edge *ge;
78   edge e;
79   edge_iterator ei;
80   int src, dest;
81   unsigned depth;
82   struct graph *g;
83   int num = number_of_loops (cfun);
84   struct loop *cloop;
85   bool irred_loop_found = false;
86   int i;
87 
88   gcc_assert (current_loops != NULL);
89 
90   /* Reset the flags.  */
91   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
92 		  EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
93     {
94       act->flags &= ~BB_IRREDUCIBLE_LOOP;
95       FOR_EACH_EDGE (e, ei, act->succs)
96 	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
97     }
98 
99   /* Create the edge lists.  */
100   g = new_graph (last_basic_block_for_fn (cfun) + num);
101 
102   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
103 		  EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
104     FOR_EACH_EDGE (e, ei, act->succs)
105       {
106 	/* Ignore edges to exit.  */
107 	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
108 	  continue;
109 
110 	src = BB_REPR (act);
111 	dest = BB_REPR (e->dest);
112 
113 	/* Ignore latch edges.  */
114 	if (e->dest->loop_father->header == e->dest
115 	    && e->dest->loop_father->latch == act)
116 	  continue;
117 
118 	/* Edges inside a single loop should be left where they are.  Edges
119 	   to subloop headers should lead to representative of the subloop,
120 	   but from the same place.
121 
122 	   Edges exiting loops should lead from representative
123 	   of the son of nearest common ancestor of the loops in that
124 	   act lays.  */
125 
126 	if (e->dest->loop_father->header == e->dest)
127 	  dest = LOOP_REPR (e->dest->loop_father);
128 
129 	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
130 	  {
131 	    depth = 1 + loop_depth (find_common_loop (act->loop_father,
132 						      e->dest->loop_father));
133 	    if (depth == loop_depth (act->loop_father))
134 	      cloop = act->loop_father;
135 	    else
136 	      cloop = (*act->loop_father->superloops)[depth];
137 
138 	    src = LOOP_REPR (cloop);
139 	  }
140 
141 	add_edge (g, src, dest)->data = e;
142       }
143 
144   /* Find the strongly connected components.  */
145   graphds_scc (g, NULL);
146 
147   /* Mark the irreducible loops.  */
148   for (i = 0; i < g->n_vertices; i++)
149     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
150       {
151 	edge real = (edge) ge->data;
152 	/* edge E in graph G is irreducible if it connects two vertices in the
153 	   same scc.  */
154 
155 	/* All edges should lead from a component with higher number to the
156 	   one with lower one.  */
157 	gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
158 
159 	if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
160 	  continue;
161 
162 	real->flags |= EDGE_IRREDUCIBLE_LOOP;
163 	irred_loop_found = true;
164 	if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
165 	  real->src->flags |= BB_IRREDUCIBLE_LOOP;
166       }
167 
168   free_graph (g);
169 
170   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
171   return irred_loop_found;
172 }
173 
174 /* Counts number of insns inside LOOP.  */
175 int
num_loop_insns(const struct loop * loop)176 num_loop_insns (const struct loop *loop)
177 {
178   basic_block *bbs, bb;
179   unsigned i, ninsns = 0;
180   rtx_insn *insn;
181 
182   bbs = get_loop_body (loop);
183   for (i = 0; i < loop->num_nodes; i++)
184     {
185       bb = bbs[i];
186       FOR_BB_INSNS (bb, insn)
187 	if (NONDEBUG_INSN_P (insn))
188 	  ninsns++;
189     }
190   free (bbs);
191 
192   if (!ninsns)
193     ninsns = 1;	/* To avoid division by zero.  */
194 
195   return ninsns;
196 }
197 
198 /* Counts number of insns executed on average per iteration LOOP.  */
199 int
average_num_loop_insns(const struct loop * loop)200 average_num_loop_insns (const struct loop *loop)
201 {
202   basic_block *bbs, bb;
203   unsigned i, binsns;
204   sreal ninsns;
205   rtx_insn *insn;
206 
207   ninsns = 0;
208   bbs = get_loop_body (loop);
209   for (i = 0; i < loop->num_nodes; i++)
210     {
211       bb = bbs[i];
212 
213       binsns = 0;
214       FOR_BB_INSNS (bb, insn)
215 	if (NONDEBUG_INSN_P (insn))
216 	  binsns++;
217 
218       ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
219       /* Avoid overflows.   */
220       if (ninsns > 1000000)
221 	return 100000;
222     }
223   free (bbs);
224 
225   int64_t ret = ninsns.to_int ();
226   if (!ret)
227     ret = 1; /* To avoid division by zero.  */
228 
229   return ret;
230 }
231 
232 /* Returns expected number of iterations of LOOP, according to
233    measured or guessed profile.
234 
235    This functions attempts to return "sane" value even if profile
236    information is not good enough to derive osmething.
237    If BY_PROFILE_ONLY is set, this logic is bypassed and function
238    return -1 in those scenarios.  */
239 
240 gcov_type
expected_loop_iterations_unbounded(const struct loop * loop,bool * read_profile_p,bool by_profile_only)241 expected_loop_iterations_unbounded (const struct loop *loop,
242 				    bool *read_profile_p,
243 				    bool by_profile_only)
244 {
245   edge e;
246   edge_iterator ei;
247   gcov_type expected = -1;
248 
249   if (read_profile_p)
250     *read_profile_p = false;
251 
252   /* If we have no profile at all, use AVG_LOOP_NITER.  */
253   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
254     {
255       if (by_profile_only)
256 	return -1;
257       expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
258     }
259   else if (loop->latch && (loop->latch->count.initialized_p ()
260 			   || loop->header->count.initialized_p ()))
261     {
262       profile_count count_in = profile_count::zero (),
263 		    count_latch = profile_count::zero ();
264 
265       FOR_EACH_EDGE (e, ei, loop->header->preds)
266 	if (e->src == loop->latch)
267 	  count_latch = e->count ();
268 	else
269 	  count_in += e->count ();
270 
271       if (!count_latch.initialized_p ())
272 	{
273           if (by_profile_only)
274 	    return -1;
275 	  expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
276 	}
277       else if (!count_in.nonzero_p ())
278 	{
279           if (by_profile_only)
280 	    return -1;
281 	  expected = count_latch.to_gcov_type () * 2;
282 	}
283       else
284 	{
285 	  expected = (count_latch.to_gcov_type () + count_in.to_gcov_type ()
286 		      - 1) / count_in.to_gcov_type ();
287 	  if (read_profile_p
288 	      && count_latch.reliable_p () && count_in.reliable_p ())
289 	    *read_profile_p = true;
290 	}
291     }
292   else
293     {
294       if (by_profile_only)
295 	return -1;
296       expected = PARAM_VALUE (PARAM_AVG_LOOP_NITER);
297     }
298 
299   if (!by_profile_only)
300     {
301       HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
302       if (max != -1 && max < expected)
303         return max;
304     }
305 
306   return expected;
307 }
308 
309 /* Returns expected number of LOOP iterations.  The returned value is bounded
310    by REG_BR_PROB_BASE.  */
311 
312 unsigned
expected_loop_iterations(struct loop * loop)313 expected_loop_iterations (struct loop *loop)
314 {
315   gcov_type expected = expected_loop_iterations_unbounded (loop);
316   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
317 }
318 
319 /* Returns the maximum level of nesting of subloops of LOOP.  */
320 
321 unsigned
get_loop_level(const struct loop * loop)322 get_loop_level (const struct loop *loop)
323 {
324   const struct loop *ploop;
325   unsigned mx = 0, l;
326 
327   for (ploop = loop->inner; ploop; ploop = ploop->next)
328     {
329       l = get_loop_level (ploop);
330       if (l >= mx)
331 	mx = l + 1;
332     }
333   return mx;
334 }
335 
336 /* Initialize the constants for computing set costs.  */
337 
338 void
init_set_costs(void)339 init_set_costs (void)
340 {
341   int speed;
342   rtx_insn *seq;
343   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
344   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
345   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
346   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
347   unsigned i;
348 
349   target_avail_regs = 0;
350   target_clobbered_regs = 0;
351   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
352     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
353 	&& !fixed_regs[i])
354       {
355 	target_avail_regs++;
356 	if (call_used_regs[i])
357 	  target_clobbered_regs++;
358       }
359 
360   target_res_regs = 3;
361 
362   for (speed = 0; speed < 2; speed++)
363      {
364       crtl->maybe_hot_insn_p = speed;
365       /* Set up the costs for using extra registers:
366 
367 	 1) If not many free registers remain, we should prefer having an
368 	    additional move to decreasing the number of available registers.
369 	    (TARGET_REG_COST).
370 	 2) If no registers are available, we need to spill, which may require
371 	    storing the old value to memory and loading it back
372 	    (TARGET_SPILL_COST).  */
373 
374       start_sequence ();
375       emit_move_insn (reg1, reg2);
376       seq = get_insns ();
377       end_sequence ();
378       target_reg_cost [speed] = seq_cost (seq, speed);
379 
380       start_sequence ();
381       emit_move_insn (mem, reg1);
382       emit_move_insn (reg2, mem);
383       seq = get_insns ();
384       end_sequence ();
385       target_spill_cost [speed] = seq_cost (seq, speed);
386     }
387   default_rtl_profile ();
388 }
389 
390 /* Estimates cost of increased register pressure caused by making N_NEW new
391    registers live around the loop.  N_OLD is the number of registers live
392    around the loop.  If CALL_P is true, also take into account that
393    call-used registers may be clobbered in the loop body, reducing the
394    number of available registers before we spill.  */
395 
396 unsigned
estimate_reg_pressure_cost(unsigned n_new,unsigned n_old,bool speed,bool call_p)397 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
398 			    bool call_p)
399 {
400   unsigned cost;
401   unsigned regs_needed = n_new + n_old;
402   unsigned available_regs = target_avail_regs;
403 
404   /* If there is a call in the loop body, the call-clobbered registers
405      are not available for loop invariants.  */
406   if (call_p)
407     available_regs = available_regs - target_clobbered_regs;
408 
409   /* If we have enough registers, we should use them and not restrict
410      the transformations unnecessarily.  */
411   if (regs_needed + target_res_regs <= available_regs)
412     return 0;
413 
414   if (regs_needed <= available_regs)
415     /* If we are close to running out of registers, try to preserve
416        them.  */
417     cost = target_reg_cost [speed] * n_new;
418   else
419     /* If we run out of registers, it is very expensive to add another
420        one.  */
421     cost = target_spill_cost [speed] * n_new;
422 
423   if (optimize && (flag_ira_region == IRA_REGION_ALL
424 		   || flag_ira_region == IRA_REGION_MIXED)
425       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
426     /* IRA regional allocation deals with high register pressure
427        better.  So decrease the cost (to do more accurate the cost
428        calculation for IRA, we need to know how many registers lives
429        through the loop transparently).  */
430     cost /= 2;
431 
432   return cost;
433 }
434 
435 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
436 
437 void
mark_loop_exit_edges(void)438 mark_loop_exit_edges (void)
439 {
440   basic_block bb;
441   edge e;
442 
443   if (number_of_loops (cfun) <= 1)
444     return;
445 
446   FOR_EACH_BB_FN (bb, cfun)
447     {
448       edge_iterator ei;
449 
450       FOR_EACH_EDGE (e, ei, bb->succs)
451 	{
452 	  if (loop_outer (bb->loop_father)
453 	      && loop_exit_edge_p (bb->loop_father, e))
454 	    e->flags |= EDGE_LOOP_EXIT;
455 	  else
456 	    e->flags &= ~EDGE_LOOP_EXIT;
457 	}
458     }
459 }
460 
461 /* Return exit edge if loop has only one exit that is likely
462    to be executed on runtime (i.e. it is not EH or leading
463    to noreturn call.  */
464 
465 edge
single_likely_exit(struct loop * loop)466 single_likely_exit (struct loop *loop)
467 {
468   edge found = single_exit (loop);
469   vec<edge> exits;
470   unsigned i;
471   edge ex;
472 
473   if (found)
474     return found;
475   exits = get_loop_exit_edges (loop);
476   FOR_EACH_VEC_ELT (exits, i, ex)
477     {
478       if (probably_never_executed_edge_p (cfun, ex)
479 	  /* We want to rule out paths to noreturns but not low probabilities
480 	     resulting from adjustments or combining.
481 	     FIXME: once we have better quality tracking, make this more
482 	     robust.  */
483 	  || ex->probability <= profile_probability::very_unlikely ())
484 	continue;
485       if (!found)
486 	found = ex;
487       else
488 	{
489 	  exits.release ();
490 	  return NULL;
491 	}
492     }
493   exits.release ();
494   return found;
495 }
496 
497 
498 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
499    order against direction of edges from latch.  Specially, if
500    header != latch, latch is the 1-st block.  */
501 
502 vec<basic_block>
get_loop_hot_path(const struct loop * loop)503 get_loop_hot_path (const struct loop *loop)
504 {
505   basic_block bb = loop->header;
506   vec<basic_block> path = vNULL;
507   bitmap visited = BITMAP_ALLOC (NULL);
508 
509   while (true)
510     {
511       edge_iterator ei;
512       edge e;
513       edge best = NULL;
514 
515       path.safe_push (bb);
516       bitmap_set_bit (visited, bb->index);
517       FOR_EACH_EDGE (e, ei, bb->succs)
518         if ((!best || e->probability > best->probability)
519 	    && !loop_exit_edge_p (loop, e)
520 	    && !bitmap_bit_p (visited, e->dest->index))
521 	  best = e;
522       if (!best || best->dest == loop->header)
523 	break;
524       bb = best->dest;
525     }
526   BITMAP_FREE (visited);
527   return path;
528 }
529