1 /* Natural loop analysis code for GNU compiler.
2    Copyright (C) 2002-2016 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "predict.h"
27 #include "emit-rtl.h"
28 #include "cfgloop.h"
29 #include "explow.h"
30 #include "expr.h"
31 #include "graphds.h"
32 #include "params.h"
33 
34 struct target_cfgloop default_target_cfgloop;
35 #if SWITCHABLE_TARGET
36 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
37 #endif
38 
39 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
40 
41 bool
just_once_each_iteration_p(const struct loop * loop,const_basic_block bb)42 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
43 {
44   /* It must be executed at least once each iteration.  */
45   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
46     return false;
47 
48   /* And just once.  */
49   if (bb->loop_father != loop)
50     return false;
51 
52   /* But this was not enough.  We might have some irreducible loop here.  */
53   if (bb->flags & BB_IRREDUCIBLE_LOOP)
54     return false;
55 
56   return true;
57 }
58 
59 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
60    throw away all latch edges and mark blocks inside any remaining cycle.
61    Everything is a bit complicated due to fact we do not want to do this
62    for parts of cycles that only "pass" through some loop -- i.e. for
63    each cycle, we want to mark blocks that belong directly to innermost
64    loop containing the whole cycle.
65 
66    LOOPS is the loop tree.  */
67 
68 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
69 #define BB_REPR(BB) ((BB)->index + 1)
70 
71 bool
mark_irreducible_loops(void)72 mark_irreducible_loops (void)
73 {
74   basic_block act;
75   struct graph_edge *ge;
76   edge e;
77   edge_iterator ei;
78   int src, dest;
79   unsigned depth;
80   struct graph *g;
81   int num = number_of_loops (cfun);
82   struct loop *cloop;
83   bool irred_loop_found = false;
84   int i;
85 
86   gcc_assert (current_loops != NULL);
87 
88   /* Reset the flags.  */
89   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
90 		  EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
91     {
92       act->flags &= ~BB_IRREDUCIBLE_LOOP;
93       FOR_EACH_EDGE (e, ei, act->succs)
94 	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
95     }
96 
97   /* Create the edge lists.  */
98   g = new_graph (last_basic_block_for_fn (cfun) + num);
99 
100   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
101 		  EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
102     FOR_EACH_EDGE (e, ei, act->succs)
103       {
104 	/* Ignore edges to exit.  */
105 	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
106 	  continue;
107 
108 	src = BB_REPR (act);
109 	dest = BB_REPR (e->dest);
110 
111 	/* Ignore latch edges.  */
112 	if (e->dest->loop_father->header == e->dest
113 	    && e->dest->loop_father->latch == act)
114 	  continue;
115 
116 	/* Edges inside a single loop should be left where they are.  Edges
117 	   to subloop headers should lead to representative of the subloop,
118 	   but from the same place.
119 
120 	   Edges exiting loops should lead from representative
121 	   of the son of nearest common ancestor of the loops in that
122 	   act lays.  */
123 
124 	if (e->dest->loop_father->header == e->dest)
125 	  dest = LOOP_REPR (e->dest->loop_father);
126 
127 	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
128 	  {
129 	    depth = 1 + loop_depth (find_common_loop (act->loop_father,
130 						      e->dest->loop_father));
131 	    if (depth == loop_depth (act->loop_father))
132 	      cloop = act->loop_father;
133 	    else
134 	      cloop = (*act->loop_father->superloops)[depth];
135 
136 	    src = LOOP_REPR (cloop);
137 	  }
138 
139 	add_edge (g, src, dest)->data = e;
140       }
141 
142   /* Find the strongly connected components.  */
143   graphds_scc (g, NULL);
144 
145   /* Mark the irreducible loops.  */
146   for (i = 0; i < g->n_vertices; i++)
147     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
148       {
149 	edge real = (edge) ge->data;
150 	/* edge E in graph G is irreducible if it connects two vertices in the
151 	   same scc.  */
152 
153 	/* All edges should lead from a component with higher number to the
154 	   one with lower one.  */
155 	gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
156 
157 	if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
158 	  continue;
159 
160 	real->flags |= EDGE_IRREDUCIBLE_LOOP;
161 	irred_loop_found = true;
162 	if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
163 	  real->src->flags |= BB_IRREDUCIBLE_LOOP;
164       }
165 
166   free_graph (g);
167 
168   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
169   return irred_loop_found;
170 }
171 
172 /* Counts number of insns inside LOOP.  */
173 int
num_loop_insns(const struct loop * loop)174 num_loop_insns (const struct loop *loop)
175 {
176   basic_block *bbs, bb;
177   unsigned i, ninsns = 0;
178   rtx_insn *insn;
179 
180   bbs = get_loop_body (loop);
181   for (i = 0; i < loop->num_nodes; i++)
182     {
183       bb = bbs[i];
184       FOR_BB_INSNS (bb, insn)
185 	if (NONDEBUG_INSN_P (insn))
186 	  ninsns++;
187     }
188   free (bbs);
189 
190   if (!ninsns)
191     ninsns = 1;	/* To avoid division by zero.  */
192 
193   return ninsns;
194 }
195 
196 /* Counts number of insns executed on average per iteration LOOP.  */
197 int
average_num_loop_insns(const struct loop * loop)198 average_num_loop_insns (const struct loop *loop)
199 {
200   basic_block *bbs, bb;
201   unsigned i, binsns, ninsns, ratio;
202   rtx_insn *insn;
203 
204   ninsns = 0;
205   bbs = get_loop_body (loop);
206   for (i = 0; i < loop->num_nodes; i++)
207     {
208       bb = bbs[i];
209 
210       binsns = 0;
211       FOR_BB_INSNS (bb, insn)
212 	if (NONDEBUG_INSN_P (insn))
213 	  binsns++;
214 
215       ratio = loop->header->frequency == 0
216 	      ? BB_FREQ_MAX
217 	      : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
218       ninsns += binsns * ratio;
219     }
220   free (bbs);
221 
222   ninsns /= BB_FREQ_MAX;
223   if (!ninsns)
224     ninsns = 1; /* To avoid division by zero.  */
225 
226   return ninsns;
227 }
228 
229 /* Returns expected number of iterations of LOOP, according to
230    measured or guessed profile.  No bounding is done on the
231    value.  */
232 
233 gcov_type
expected_loop_iterations_unbounded(const struct loop * loop)234 expected_loop_iterations_unbounded (const struct loop *loop)
235 {
236   edge e;
237   edge_iterator ei;
238 
239   if (loop->latch->count || loop->header->count)
240     {
241       gcov_type count_in, count_latch, expected;
242 
243       count_in = 0;
244       count_latch = 0;
245 
246       FOR_EACH_EDGE (e, ei, loop->header->preds)
247 	if (e->src == loop->latch)
248 	  count_latch = e->count;
249 	else
250 	  count_in += e->count;
251 
252       if (count_in == 0)
253 	expected = count_latch * 2;
254       else
255 	expected = (count_latch + count_in - 1) / count_in;
256 
257       return expected;
258     }
259   else
260     {
261       int freq_in, freq_latch;
262 
263       freq_in = 0;
264       freq_latch = 0;
265 
266       FOR_EACH_EDGE (e, ei, loop->header->preds)
267 	if (e->src == loop->latch)
268 	  freq_latch = EDGE_FREQUENCY (e);
269 	else
270 	  freq_in += EDGE_FREQUENCY (e);
271 
272       if (freq_in == 0)
273 	return freq_latch * 2;
274 
275       return (freq_latch + freq_in - 1) / freq_in;
276     }
277 }
278 
279 /* Returns expected number of LOOP iterations.  The returned value is bounded
280    by REG_BR_PROB_BASE.  */
281 
282 unsigned
expected_loop_iterations(const struct loop * loop)283 expected_loop_iterations (const struct loop *loop)
284 {
285   gcov_type expected = expected_loop_iterations_unbounded (loop);
286   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
287 }
288 
289 /* Returns the maximum level of nesting of subloops of LOOP.  */
290 
291 unsigned
get_loop_level(const struct loop * loop)292 get_loop_level (const struct loop *loop)
293 {
294   const struct loop *ploop;
295   unsigned mx = 0, l;
296 
297   for (ploop = loop->inner; ploop; ploop = ploop->next)
298     {
299       l = get_loop_level (ploop);
300       if (l >= mx)
301 	mx = l + 1;
302     }
303   return mx;
304 }
305 
306 /* Initialize the constants for computing set costs.  */
307 
308 void
init_set_costs(void)309 init_set_costs (void)
310 {
311   int speed;
312   rtx_insn *seq;
313   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
314   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
315   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
316   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
317   unsigned i;
318 
319   target_avail_regs = 0;
320   target_clobbered_regs = 0;
321   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
322     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
323 	&& !fixed_regs[i])
324       {
325 	target_avail_regs++;
326 	if (call_used_regs[i])
327 	  target_clobbered_regs++;
328       }
329 
330   target_res_regs = 3;
331 
332   for (speed = 0; speed < 2; speed++)
333      {
334       crtl->maybe_hot_insn_p = speed;
335       /* Set up the costs for using extra registers:
336 
337 	 1) If not many free registers remain, we should prefer having an
338 	    additional move to decreasing the number of available registers.
339 	    (TARGET_REG_COST).
340 	 2) If no registers are available, we need to spill, which may require
341 	    storing the old value to memory and loading it back
342 	    (TARGET_SPILL_COST).  */
343 
344       start_sequence ();
345       emit_move_insn (reg1, reg2);
346       seq = get_insns ();
347       end_sequence ();
348       target_reg_cost [speed] = seq_cost (seq, speed);
349 
350       start_sequence ();
351       emit_move_insn (mem, reg1);
352       emit_move_insn (reg2, mem);
353       seq = get_insns ();
354       end_sequence ();
355       target_spill_cost [speed] = seq_cost (seq, speed);
356     }
357   default_rtl_profile ();
358 }
359 
360 /* Estimates cost of increased register pressure caused by making N_NEW new
361    registers live around the loop.  N_OLD is the number of registers live
362    around the loop.  If CALL_P is true, also take into account that
363    call-used registers may be clobbered in the loop body, reducing the
364    number of available registers before we spill.  */
365 
366 unsigned
estimate_reg_pressure_cost(unsigned n_new,unsigned n_old,bool speed,bool call_p)367 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
368 			    bool call_p)
369 {
370   unsigned cost;
371   unsigned regs_needed = n_new + n_old;
372   unsigned available_regs = target_avail_regs;
373 
374   /* If there is a call in the loop body, the call-clobbered registers
375      are not available for loop invariants.  */
376   if (call_p)
377     available_regs = available_regs - target_clobbered_regs;
378 
379   /* If we have enough registers, we should use them and not restrict
380      the transformations unnecessarily.  */
381   if (regs_needed + target_res_regs <= available_regs)
382     return 0;
383 
384   if (regs_needed <= available_regs)
385     /* If we are close to running out of registers, try to preserve
386        them.  */
387     cost = target_reg_cost [speed] * n_new;
388   else
389     /* If we run out of registers, it is very expensive to add another
390        one.  */
391     cost = target_spill_cost [speed] * n_new;
392 
393   if (optimize && (flag_ira_region == IRA_REGION_ALL
394 		   || flag_ira_region == IRA_REGION_MIXED)
395       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
396     /* IRA regional allocation deals with high register pressure
397        better.  So decrease the cost (to do more accurate the cost
398        calculation for IRA, we need to know how many registers lives
399        through the loop transparently).  */
400     cost /= 2;
401 
402   return cost;
403 }
404 
405 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
406 
407 void
mark_loop_exit_edges(void)408 mark_loop_exit_edges (void)
409 {
410   basic_block bb;
411   edge e;
412 
413   if (number_of_loops (cfun) <= 1)
414     return;
415 
416   FOR_EACH_BB_FN (bb, cfun)
417     {
418       edge_iterator ei;
419 
420       FOR_EACH_EDGE (e, ei, bb->succs)
421 	{
422 	  if (loop_outer (bb->loop_father)
423 	      && loop_exit_edge_p (bb->loop_father, e))
424 	    e->flags |= EDGE_LOOP_EXIT;
425 	  else
426 	    e->flags &= ~EDGE_LOOP_EXIT;
427 	}
428     }
429 }
430 
431 /* Return exit edge if loop has only one exit that is likely
432    to be executed on runtime (i.e. it is not EH or leading
433    to noreturn call.  */
434 
435 edge
single_likely_exit(struct loop * loop)436 single_likely_exit (struct loop *loop)
437 {
438   edge found = single_exit (loop);
439   vec<edge> exits;
440   unsigned i;
441   edge ex;
442 
443   if (found)
444     return found;
445   exits = get_loop_exit_edges (loop);
446   FOR_EACH_VEC_ELT (exits, i, ex)
447     {
448       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
449 	continue;
450       /* The constant of 5 is set in a way so noreturn calls are
451 	 ruled out by this test.  The static branch prediction algorithm
452          will not assign such a low probability to conditionals for usual
453          reasons.  */
454       if (profile_status_for_fn (cfun) != PROFILE_ABSENT
455 	  && ex->probability < 5 && !ex->count)
456 	continue;
457       if (!found)
458 	found = ex;
459       else
460 	{
461 	  exits.release ();
462 	  return NULL;
463 	}
464     }
465   exits.release ();
466   return found;
467 }
468 
469 
470 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
471    order against direction of edges from latch.  Specially, if
472    header != latch, latch is the 1-st block.  */
473 
474 vec<basic_block>
get_loop_hot_path(const struct loop * loop)475 get_loop_hot_path (const struct loop *loop)
476 {
477   basic_block bb = loop->header;
478   vec<basic_block> path = vNULL;
479   bitmap visited = BITMAP_ALLOC (NULL);
480 
481   while (true)
482     {
483       edge_iterator ei;
484       edge e;
485       edge best = NULL;
486 
487       path.safe_push (bb);
488       bitmap_set_bit (visited, bb->index);
489       FOR_EACH_EDGE (e, ei, bb->succs)
490         if ((!best || e->probability > best->probability)
491 	    && !loop_exit_edge_p (loop, e)
492 	    && !bitmap_bit_p (visited, e->dest->index))
493 	  best = e;
494       if (!best || best->dest == loop->header)
495 	break;
496       bb = best->dest;
497     }
498   BITMAP_FREE (visited);
499   return path;
500 }
501