xref: /netbsd/external/gpl3/gcc/dist/gcc/cfgloopanal.cc (revision f0fbc68b)
1 /* Natural loop analysis code for GNU compiler.
2    Copyright (C) 2002-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "predict.h"
27 #include "memmodel.h"
28 #include "emit-rtl.h"
29 #include "cfgloop.h"
30 #include "explow.h"
31 #include "expr.h"
32 #include "graphds.h"
33 #include "sreal.h"
34 #include "regs.h"
35 #include "function-abi.h"
36 
37 struct target_cfgloop default_target_cfgloop;
38 #if SWITCHABLE_TARGET
39 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
40 #endif
41 
42 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
43 
44 bool
just_once_each_iteration_p(const class loop * loop,const_basic_block bb)45 just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
46 {
47   /* It must be executed at least once each iteration.  */
48   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
49     return false;
50 
51   /* And just once.  */
52   if (bb->loop_father != loop)
53     return false;
54 
55   /* But this was not enough.  We might have some irreducible loop here.  */
56   if (bb->flags & BB_IRREDUCIBLE_LOOP)
57     return false;
58 
59   return true;
60 }
61 
62 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
63    throw away all latch edges and mark blocks inside any remaining cycle.
64    Everything is a bit complicated due to fact we do not want to do this
65    for parts of cycles that only "pass" through some loop -- i.e. for
66    each cycle, we want to mark blocks that belong directly to innermost
67    loop containing the whole cycle.
68 
69    LOOPS is the loop tree.  */
70 
71 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
72 #define BB_REPR(BB) ((BB)->index + 1)
73 
74 bool
mark_irreducible_loops(void)75 mark_irreducible_loops (void)
76 {
77   basic_block act;
78   struct graph_edge *ge;
79   edge e;
80   edge_iterator ei;
81   int src, dest;
82   unsigned depth;
83   struct graph *g;
84   int num = number_of_loops (cfun);
85   class loop *cloop;
86   bool irred_loop_found = false;
87   int i;
88 
89   gcc_assert (current_loops != NULL);
90 
91   /* Reset the flags.  */
92   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
93 		  EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
94     {
95       act->flags &= ~BB_IRREDUCIBLE_LOOP;
96       FOR_EACH_EDGE (e, ei, act->succs)
97 	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
98     }
99 
100   /* Create the edge lists.  */
101   g = new_graph (last_basic_block_for_fn (cfun) + num);
102 
103   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
104 		  EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
105     FOR_EACH_EDGE (e, ei, act->succs)
106       {
107 	/* Ignore edges to exit.  */
108 	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
109 	  continue;
110 
111 	src = BB_REPR (act);
112 	dest = BB_REPR (e->dest);
113 
114 	/* Ignore latch edges.  */
115 	if (e->dest->loop_father->header == e->dest
116 	    && dominated_by_p (CDI_DOMINATORS, act, e->dest))
117 	  continue;
118 
119 	/* Edges inside a single loop should be left where they are.  Edges
120 	   to subloop headers should lead to representative of the subloop,
121 	   but from the same place.
122 
123 	   Edges exiting loops should lead from representative
124 	   of the son of nearest common ancestor of the loops in that
125 	   act lays.  */
126 
127 	if (e->dest->loop_father->header == e->dest)
128 	  dest = LOOP_REPR (e->dest->loop_father);
129 
130 	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
131 	  {
132 	    depth = 1 + loop_depth (find_common_loop (act->loop_father,
133 						      e->dest->loop_father));
134 	    if (depth == loop_depth (act->loop_father))
135 	      cloop = act->loop_father;
136 	    else
137 	      cloop = (*act->loop_father->superloops)[depth];
138 
139 	    src = LOOP_REPR (cloop);
140 	  }
141 
142 	add_edge (g, src, dest)->data = e;
143       }
144 
145   /* Find the strongly connected components.  */
146   graphds_scc (g, NULL);
147 
148   /* Mark the irreducible loops.  */
149   for (i = 0; i < g->n_vertices; i++)
150     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
151       {
152 	edge real = (edge) ge->data;
153 	/* edge E in graph G is irreducible if it connects two vertices in the
154 	   same scc.  */
155 
156 	/* All edges should lead from a component with higher number to the
157 	   one with lower one.  */
158 	gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
159 
160 	if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
161 	  continue;
162 
163 	real->flags |= EDGE_IRREDUCIBLE_LOOP;
164 	irred_loop_found = true;
165 	if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
166 	  real->src->flags |= BB_IRREDUCIBLE_LOOP;
167       }
168 
169   free_graph (g);
170 
171   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
172   return irred_loop_found;
173 }
174 
175 /* Counts number of insns inside LOOP.  */
176 int
num_loop_insns(const class loop * loop)177 num_loop_insns (const class loop *loop)
178 {
179   basic_block *bbs, bb;
180   unsigned i, ninsns = 0;
181   rtx_insn *insn;
182 
183   bbs = get_loop_body (loop);
184   for (i = 0; i < loop->num_nodes; i++)
185     {
186       bb = bbs[i];
187       FOR_BB_INSNS (bb, insn)
188 	if (NONDEBUG_INSN_P (insn))
189 	  ninsns++;
190     }
191   free (bbs);
192 
193   if (!ninsns)
194     ninsns = 1;	/* To avoid division by zero.  */
195 
196   return ninsns;
197 }
198 
199 /* Counts number of insns executed on average per iteration LOOP.  */
200 int
average_num_loop_insns(const class loop * loop)201 average_num_loop_insns (const class loop *loop)
202 {
203   basic_block *bbs, bb;
204   unsigned i, binsns;
205   sreal ninsns;
206   rtx_insn *insn;
207 
208   ninsns = 0;
209   bbs = get_loop_body (loop);
210   for (i = 0; i < loop->num_nodes; i++)
211     {
212       bb = bbs[i];
213 
214       binsns = 0;
215       FOR_BB_INSNS (bb, insn)
216 	if (NONDEBUG_INSN_P (insn))
217 	  binsns++;
218 
219       ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
220       /* Avoid overflows.   */
221       if (ninsns > 1000000)
222 	{
223 	  free (bbs);
224 	  return 1000000;
225 	}
226     }
227   free (bbs);
228 
229   int64_t ret = ninsns.to_int ();
230   if (!ret)
231     ret = 1; /* To avoid division by zero.  */
232 
233   return ret;
234 }
235 
236 /* Returns expected number of iterations of LOOP, according to
237    measured or guessed profile.
238 
239    This functions attempts to return "sane" value even if profile
240    information is not good enough to derive osmething.
241    If BY_PROFILE_ONLY is set, this logic is bypassed and function
242    return -1 in those scenarios.  */
243 
244 gcov_type
expected_loop_iterations_unbounded(const class loop * loop,bool * read_profile_p,bool by_profile_only)245 expected_loop_iterations_unbounded (const class loop *loop,
246 				    bool *read_profile_p,
247 				    bool by_profile_only)
248 {
249   edge e;
250   edge_iterator ei;
251   gcov_type expected = -1;
252 
253   if (read_profile_p)
254     *read_profile_p = false;
255 
256   /* If we have no profile at all, use AVG_LOOP_NITER.  */
257   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
258     {
259       if (by_profile_only)
260 	return -1;
261       expected = param_avg_loop_niter;
262     }
263   else if (loop->latch && (loop->latch->count.initialized_p ()
264 			   || loop->header->count.initialized_p ()))
265     {
266       profile_count count_in = profile_count::zero (),
267 		    count_latch = profile_count::zero ();
268 
269       FOR_EACH_EDGE (e, ei, loop->header->preds)
270 	if (e->src == loop->latch)
271 	  count_latch = e->count ();
272 	else
273 	  count_in += e->count ();
274 
275       if (!count_latch.initialized_p ())
276 	{
277           if (by_profile_only)
278 	    return -1;
279 	  expected = param_avg_loop_niter;
280 	}
281       else if (!count_in.nonzero_p ())
282 	{
283           if (by_profile_only)
284 	    return -1;
285 	  expected = count_latch.to_gcov_type () * 2;
286 	}
287       else
288 	{
289 	  expected = (count_latch.to_gcov_type () + count_in.to_gcov_type ()
290 		      - 1) / count_in.to_gcov_type ();
291 	  if (read_profile_p
292 	      && count_latch.reliable_p () && count_in.reliable_p ())
293 	    *read_profile_p = true;
294 	}
295     }
296   else
297     {
298       if (by_profile_only)
299 	return -1;
300       expected = param_avg_loop_niter;
301     }
302 
303   if (!by_profile_only)
304     {
305       HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
306       if (max != -1 && max < expected)
307         return max;
308     }
309 
310   return expected;
311 }
312 
313 /* Returns expected number of LOOP iterations.  The returned value is bounded
314    by REG_BR_PROB_BASE.  */
315 
316 unsigned
expected_loop_iterations(class loop * loop)317 expected_loop_iterations (class loop *loop)
318 {
319   gcov_type expected = expected_loop_iterations_unbounded (loop);
320   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
321 }
322 
323 /* Returns the maximum level of nesting of subloops of LOOP.  */
324 
325 unsigned
get_loop_level(const class loop * loop)326 get_loop_level (const class loop *loop)
327 {
328   const class loop *ploop;
329   unsigned mx = 0, l;
330 
331   for (ploop = loop->inner; ploop; ploop = ploop->next)
332     {
333       l = get_loop_level (ploop);
334       if (l >= mx)
335 	mx = l + 1;
336     }
337   return mx;
338 }
339 
340 /* Initialize the constants for computing set costs.  */
341 
342 void
init_set_costs(void)343 init_set_costs (void)
344 {
345   int speed;
346   rtx_insn *seq;
347   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
348   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
349   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
350   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
351   unsigned i;
352 
353   target_avail_regs = 0;
354   target_clobbered_regs = 0;
355   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
356     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
357 	&& !fixed_regs[i])
358       {
359 	target_avail_regs++;
360 	/* ??? This is only a rough heuristic.  It doesn't cope well
361 	   with alternative ABIs, but that's an optimization rather than
362 	   correctness issue.  */
363 	if (default_function_abi.clobbers_full_reg_p (i))
364 	  target_clobbered_regs++;
365       }
366 
367   target_res_regs = 3;
368 
369   for (speed = 0; speed < 2; speed++)
370      {
371       crtl->maybe_hot_insn_p = speed;
372       /* Set up the costs for using extra registers:
373 
374 	 1) If not many free registers remain, we should prefer having an
375 	    additional move to decreasing the number of available registers.
376 	    (TARGET_REG_COST).
377 	 2) If no registers are available, we need to spill, which may require
378 	    storing the old value to memory and loading it back
379 	    (TARGET_SPILL_COST).  */
380 
381       start_sequence ();
382       emit_move_insn (reg1, reg2);
383       seq = get_insns ();
384       end_sequence ();
385       target_reg_cost [speed] = seq_cost (seq, speed);
386 
387       start_sequence ();
388       emit_move_insn (mem, reg1);
389       emit_move_insn (reg2, mem);
390       seq = get_insns ();
391       end_sequence ();
392       target_spill_cost [speed] = seq_cost (seq, speed);
393     }
394   default_rtl_profile ();
395 }
396 
397 /* Estimates cost of increased register pressure caused by making N_NEW new
398    registers live around the loop.  N_OLD is the number of registers live
399    around the loop.  If CALL_P is true, also take into account that
400    call-used registers may be clobbered in the loop body, reducing the
401    number of available registers before we spill.  */
402 
403 unsigned
estimate_reg_pressure_cost(unsigned n_new,unsigned n_old,bool speed,bool call_p)404 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
405 			    bool call_p)
406 {
407   unsigned cost;
408   unsigned regs_needed = n_new + n_old;
409   unsigned available_regs = target_avail_regs;
410 
411   /* If there is a call in the loop body, the call-clobbered registers
412      are not available for loop invariants.  */
413   if (call_p)
414     available_regs = available_regs - target_clobbered_regs;
415 
416   /* If we have enough registers, we should use them and not restrict
417      the transformations unnecessarily.  */
418   if (regs_needed + target_res_regs <= available_regs)
419     return 0;
420 
421   if (regs_needed <= available_regs)
422     /* If we are close to running out of registers, try to preserve
423        them.  */
424     cost = target_reg_cost [speed] * n_new;
425   else
426     /* If we run out of registers, it is very expensive to add another
427        one.  */
428     cost = target_spill_cost [speed] * n_new;
429 
430   if (optimize && (flag_ira_region == IRA_REGION_ALL
431 		   || flag_ira_region == IRA_REGION_MIXED)
432       && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
433     /* IRA regional allocation deals with high register pressure
434        better.  So decrease the cost (to do more accurate the cost
435        calculation for IRA, we need to know how many registers lives
436        through the loop transparently).  */
437     cost /= 2;
438 
439   return cost;
440 }
441 
442 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
443 
444 void
mark_loop_exit_edges(void)445 mark_loop_exit_edges (void)
446 {
447   basic_block bb;
448   edge e;
449 
450   if (number_of_loops (cfun) <= 1)
451     return;
452 
453   FOR_EACH_BB_FN (bb, cfun)
454     {
455       edge_iterator ei;
456 
457       FOR_EACH_EDGE (e, ei, bb->succs)
458 	{
459 	  if (loop_outer (bb->loop_father)
460 	      && loop_exit_edge_p (bb->loop_father, e))
461 	    e->flags |= EDGE_LOOP_EXIT;
462 	  else
463 	    e->flags &= ~EDGE_LOOP_EXIT;
464 	}
465     }
466 }
467 
468 /* Return exit edge if loop has only one exit that is likely
469    to be executed on runtime (i.e. it is not EH or leading
470    to noreturn call.  */
471 
472 edge
single_likely_exit(class loop * loop,const vec<edge> & exits)473 single_likely_exit (class loop *loop, const vec<edge> &exits)
474 {
475   edge found = single_exit (loop);
476   unsigned i;
477   edge ex;
478 
479   if (found)
480     return found;
481   FOR_EACH_VEC_ELT (exits, i, ex)
482     {
483       if (probably_never_executed_edge_p (cfun, ex)
484 	  /* We want to rule out paths to noreturns but not low probabilities
485 	     resulting from adjustments or combining.
486 	     FIXME: once we have better quality tracking, make this more
487 	     robust.  */
488 	  || ex->probability <= profile_probability::very_unlikely ())
489 	continue;
490       if (!found)
491 	found = ex;
492       else
493 	return NULL;
494     }
495   return found;
496 }
497 
498 
499 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
500    order against direction of edges from latch.  Specially, if
501    header != latch, latch is the 1-st block.  */
502 
503 auto_vec<basic_block>
get_loop_hot_path(const class loop * loop)504 get_loop_hot_path (const class loop *loop)
505 {
506   basic_block bb = loop->header;
507   auto_vec<basic_block> path;
508   bitmap visited = BITMAP_ALLOC (NULL);
509 
510   while (true)
511     {
512       edge_iterator ei;
513       edge e;
514       edge best = NULL;
515 
516       path.safe_push (bb);
517       bitmap_set_bit (visited, bb->index);
518       FOR_EACH_EDGE (e, ei, bb->succs)
519         if ((!best || e->probability > best->probability)
520 	    && !loop_exit_edge_p (loop, e)
521 	    && !bitmap_bit_p (visited, e->dest->index))
522 	  best = e;
523       if (!best || best->dest == loop->header)
524 	break;
525       bb = best->dest;
526     }
527   BITMAP_FREE (visited);
528   return path;
529 }
530