1 /* Loop manipulation code for GNU compiler.
2    Copyright (C) 2002-2020 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "cfganal.h"
29 #include "cfgloop.h"
30 #include "gimple-iterator.h"
31 #include "gimplify-me.h"
32 #include "tree-ssa-loop-manip.h"
33 #include "dumpfile.h"
34 
35 static void copy_loops_to (class loop **, int,
36 			   class loop *);
37 static void loop_redirect_edge (edge, basic_block);
38 static void remove_bbs (basic_block *, int);
39 static bool rpe_enum_p (const_basic_block, const void *);
40 static int find_path (edge, basic_block **);
41 static void fix_loop_placements (class loop *, bool *);
42 static bool fix_bb_placement (basic_block);
43 static void fix_bb_placements (basic_block, bool *, bitmap);
44 
45 /* Checks whether basic block BB is dominated by DATA.  */
46 static bool
rpe_enum_p(const_basic_block bb,const void * data)47 rpe_enum_p (const_basic_block bb, const void *data)
48 {
49   return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
50 }
51 
52 /* Remove basic blocks BBS.  NBBS is the number of the basic blocks.  */
53 
54 static void
remove_bbs(basic_block * bbs,int nbbs)55 remove_bbs (basic_block *bbs, int nbbs)
56 {
57   int i;
58 
59   for (i = 0; i < nbbs; i++)
60     delete_basic_block (bbs[i]);
61 }
62 
63 /* Find path -- i.e. the basic blocks dominated by edge E and put them
64    into array BBS, that will be allocated large enough to contain them.
65    E->dest must have exactly one predecessor for this to work (it is
66    easy to achieve and we do not put it here because we do not want to
67    alter anything by this function).  The number of basic blocks in the
68    path is returned.  */
69 static int
find_path(edge e,basic_block ** bbs)70 find_path (edge e, basic_block **bbs)
71 {
72   gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
73 
74   /* Find bbs in the path.  */
75   *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
76   return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
77 			     n_basic_blocks_for_fn (cfun), e->dest);
78 }
79 
80 /* Fix placement of basic block BB inside loop hierarchy --
81    Let L be a loop to that BB belongs.  Then every successor of BB must either
82      1) belong to some superloop of loop L, or
83      2) be a header of loop K such that K->outer is superloop of L
84    Returns true if we had to move BB into other loop to enforce this condition,
85    false if the placement of BB was already correct (provided that placements
86    of its successors are correct).  */
87 static bool
fix_bb_placement(basic_block bb)88 fix_bb_placement (basic_block bb)
89 {
90   edge e;
91   edge_iterator ei;
92   class loop *loop = current_loops->tree_root, *act;
93 
94   FOR_EACH_EDGE (e, ei, bb->succs)
95     {
96       if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
97 	continue;
98 
99       act = e->dest->loop_father;
100       if (act->header == e->dest)
101 	act = loop_outer (act);
102 
103       if (flow_loop_nested_p (loop, act))
104 	loop = act;
105     }
106 
107   if (loop == bb->loop_father)
108     return false;
109 
110   remove_bb_from_loops (bb);
111   add_bb_to_loop (bb, loop);
112 
113   return true;
114 }
115 
116 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
117    of LOOP to that leads at least one exit edge of LOOP, and set it
118    as the immediate superloop of LOOP.  Return true if the immediate superloop
119    of LOOP changed.
120 
121    IRRED_INVALIDATED is set to true if a change in the loop structures might
122    invalidate the information about irreducible regions.  */
123 
124 static bool
fix_loop_placement(class loop * loop,bool * irred_invalidated)125 fix_loop_placement (class loop *loop, bool *irred_invalidated)
126 {
127   unsigned i;
128   edge e;
129   vec<edge> exits = get_loop_exit_edges (loop);
130   class loop *father = current_loops->tree_root, *act;
131   bool ret = false;
132 
133   FOR_EACH_VEC_ELT (exits, i, e)
134     {
135       act = find_common_loop (loop, e->dest->loop_father);
136       if (flow_loop_nested_p (father, act))
137 	father = act;
138     }
139 
140   if (father != loop_outer (loop))
141     {
142       for (act = loop_outer (loop); act != father; act = loop_outer (act))
143 	act->num_nodes -= loop->num_nodes;
144       flow_loop_tree_node_remove (loop);
145       flow_loop_tree_node_add (father, loop);
146 
147       /* The exit edges of LOOP no longer exits its original immediate
148 	 superloops; remove them from the appropriate exit lists.  */
149       FOR_EACH_VEC_ELT (exits, i, e)
150 	{
151 	  /* We may need to recompute irreducible loops.  */
152 	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
153 	    *irred_invalidated = true;
154 	  rescan_loop_exit (e, false, false);
155 	}
156 
157       ret = true;
158     }
159 
160   exits.release ();
161   return ret;
162 }
163 
164 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
165    enforce condition stated in description of fix_bb_placement. We
166    start from basic block FROM that had some of its successors removed, so that
167    his placement no longer has to be correct, and iteratively fix placement of
168    its predecessors that may change if placement of FROM changed.  Also fix
169    placement of subloops of FROM->loop_father, that might also be altered due
170    to this change; the condition for them is similar, except that instead of
171    successors we consider edges coming out of the loops.
172 
173    If the changes may invalidate the information about irreducible regions,
174    IRRED_INVALIDATED is set to true.
175 
176    If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
177    changed loop_father are collected there. */
178 
179 static void
fix_bb_placements(basic_block from,bool * irred_invalidated,bitmap loop_closed_ssa_invalidated)180 fix_bb_placements (basic_block from,
181 		   bool *irred_invalidated,
182 		   bitmap loop_closed_ssa_invalidated)
183 {
184   basic_block *queue, *qtop, *qbeg, *qend;
185   class loop *base_loop, *target_loop;
186   edge e;
187 
188   /* We pass through blocks back-reachable from FROM, testing whether some
189      of their successors moved to outer loop.  It may be necessary to
190      iterate several times, but it is finite, as we stop unless we move
191      the basic block up the loop structure.  The whole story is a bit
192      more complicated due to presence of subloops, those are moved using
193      fix_loop_placement.  */
194 
195   base_loop = from->loop_father;
196   /* If we are already in the outermost loop, the basic blocks cannot be moved
197      outside of it.  If FROM is the header of the base loop, it cannot be moved
198      outside of it, either.  In both cases, we can end now.  */
199   if (base_loop == current_loops->tree_root
200       || from == base_loop->header)
201     return;
202 
203   auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
204   bitmap_clear (in_queue);
205   bitmap_set_bit (in_queue, from->index);
206   /* Prevent us from going out of the base_loop.  */
207   bitmap_set_bit (in_queue, base_loop->header->index);
208 
209   queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
210   qtop = queue + base_loop->num_nodes + 1;
211   qbeg = queue;
212   qend = queue + 1;
213   *qbeg = from;
214 
215   while (qbeg != qend)
216     {
217       edge_iterator ei;
218       from = *qbeg;
219       qbeg++;
220       if (qbeg == qtop)
221 	qbeg = queue;
222       bitmap_clear_bit (in_queue, from->index);
223 
224       if (from->loop_father->header == from)
225 	{
226 	  /* Subloop header, maybe move the loop upward.  */
227 	  if (!fix_loop_placement (from->loop_father, irred_invalidated))
228 	    continue;
229 	  target_loop = loop_outer (from->loop_father);
230 	  if (loop_closed_ssa_invalidated)
231 	    {
232 	      basic_block *bbs = get_loop_body (from->loop_father);
233 	      for (unsigned i = 0; i < from->loop_father->num_nodes; ++i)
234 		bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
235 	      free (bbs);
236 	    }
237 	}
238       else
239 	{
240 	  /* Ordinary basic block.  */
241 	  if (!fix_bb_placement (from))
242 	    continue;
243 	  target_loop = from->loop_father;
244 	  if (loop_closed_ssa_invalidated)
245 	    bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
246 	}
247 
248       FOR_EACH_EDGE (e, ei, from->succs)
249 	{
250 	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
251 	    *irred_invalidated = true;
252 	}
253 
254       /* Something has changed, insert predecessors into queue.  */
255       FOR_EACH_EDGE (e, ei, from->preds)
256 	{
257 	  basic_block pred = e->src;
258 	  class loop *nca;
259 
260 	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
261 	    *irred_invalidated = true;
262 
263 	  if (bitmap_bit_p (in_queue, pred->index))
264 	    continue;
265 
266 	  /* If it is subloop, then it either was not moved, or
267 	     the path up the loop tree from base_loop do not contain
268 	     it.  */
269 	  nca = find_common_loop (pred->loop_father, base_loop);
270 	  if (pred->loop_father != base_loop
271 	      && (nca == base_loop
272 		  || nca != pred->loop_father))
273 	    pred = pred->loop_father->header;
274 	  else if (!flow_loop_nested_p (target_loop, pred->loop_father))
275 	    {
276 	      /* If PRED is already higher in the loop hierarchy than the
277 		 TARGET_LOOP to that we moved FROM, the change of the position
278 		 of FROM does not affect the position of PRED, so there is no
279 		 point in processing it.  */
280 	      continue;
281 	    }
282 
283 	  if (bitmap_bit_p (in_queue, pred->index))
284 	    continue;
285 
286 	  /* Schedule the basic block.  */
287 	  *qend = pred;
288 	  qend++;
289 	  if (qend == qtop)
290 	    qend = queue;
291 	  bitmap_set_bit (in_queue, pred->index);
292 	}
293     }
294   free (queue);
295 }
296 
297 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
298    and update loop structures and dominators.  Return true if we were able
299    to remove the path, false otherwise (and nothing is affected then).  */
300 bool
remove_path(edge e,bool * irred_invalidated,bitmap loop_closed_ssa_invalidated)301 remove_path (edge e, bool *irred_invalidated,
302 	     bitmap loop_closed_ssa_invalidated)
303 {
304   edge ae;
305   basic_block *rem_bbs, *bord_bbs, from, bb;
306   vec<basic_block> dom_bbs;
307   int i, nrem, n_bord_bbs;
308   bool local_irred_invalidated = false;
309   edge_iterator ei;
310   class loop *l, *f;
311 
312   if (! irred_invalidated)
313     irred_invalidated = &local_irred_invalidated;
314 
315   if (!can_remove_branch_p (e))
316     return false;
317 
318   /* Keep track of whether we need to update information about irreducible
319      regions.  This is the case if the removed area is a part of the
320      irreducible region, or if the set of basic blocks that belong to a loop
321      that is inside an irreducible region is changed, or if such a loop is
322      removed.  */
323   if (e->flags & EDGE_IRREDUCIBLE_LOOP)
324     *irred_invalidated = true;
325 
326   /* We need to check whether basic blocks are dominated by the edge
327      e, but we only have basic block dominators.  This is easy to
328      fix -- when e->dest has exactly one predecessor, this corresponds
329      to blocks dominated by e->dest, if not, split the edge.  */
330   if (!single_pred_p (e->dest))
331     e = single_pred_edge (split_edge (e));
332 
333   /* It may happen that by removing path we remove one or more loops
334      we belong to.  In this case first unloop the loops, then proceed
335      normally.   We may assume that e->dest is not a header of any loop,
336      as it now has exactly one predecessor.  */
337   for (l = e->src->loop_father; loop_outer (l); l = f)
338     {
339       f = loop_outer (l);
340       if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
341         unloop (l, irred_invalidated, loop_closed_ssa_invalidated);
342     }
343 
344   /* Identify the path.  */
345   nrem = find_path (e, &rem_bbs);
346 
347   n_bord_bbs = 0;
348   bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
349   auto_sbitmap seen (last_basic_block_for_fn (cfun));
350   bitmap_clear (seen);
351 
352   /* Find "border" hexes -- i.e. those with predecessor in removed path.  */
353   for (i = 0; i < nrem; i++)
354     bitmap_set_bit (seen, rem_bbs[i]->index);
355   if (!*irred_invalidated)
356     FOR_EACH_EDGE (ae, ei, e->src->succs)
357       if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
358 	  && !bitmap_bit_p (seen, ae->dest->index)
359 	  && ae->flags & EDGE_IRREDUCIBLE_LOOP)
360 	{
361 	  *irred_invalidated = true;
362 	  break;
363 	}
364 
365   for (i = 0; i < nrem; i++)
366     {
367       FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
368 	if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
369 	    && !bitmap_bit_p (seen, ae->dest->index))
370 	  {
371 	    bitmap_set_bit (seen, ae->dest->index);
372 	    bord_bbs[n_bord_bbs++] = ae->dest;
373 
374 	    if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
375 	      *irred_invalidated = true;
376 	  }
377     }
378 
379   /* Remove the path.  */
380   from = e->src;
381   remove_branch (e);
382   dom_bbs.create (0);
383 
384   /* Cancel loops contained in the path.  */
385   for (i = 0; i < nrem; i++)
386     if (rem_bbs[i]->loop_father->header == rem_bbs[i])
387       cancel_loop_tree (rem_bbs[i]->loop_father);
388 
389   remove_bbs (rem_bbs, nrem);
390   free (rem_bbs);
391 
392   /* Find blocks whose dominators may be affected.  */
393   bitmap_clear (seen);
394   for (i = 0; i < n_bord_bbs; i++)
395     {
396       basic_block ldom;
397 
398       bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
399       if (bitmap_bit_p (seen, bb->index))
400 	continue;
401       bitmap_set_bit (seen, bb->index);
402 
403       for (ldom = first_dom_son (CDI_DOMINATORS, bb);
404 	   ldom;
405 	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
406 	if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
407 	  dom_bbs.safe_push (ldom);
408     }
409 
410   /* Recount dominators.  */
411   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
412   dom_bbs.release ();
413   free (bord_bbs);
414 
415   /* Fix placements of basic blocks inside loops and the placement of
416      loops in the loop tree.  */
417   fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated);
418   fix_loop_placements (from->loop_father, irred_invalidated);
419 
420   if (local_irred_invalidated
421       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
422     mark_irreducible_loops ();
423 
424   return true;
425 }
426 
427 /* Creates place for a new LOOP in loops structure of FN.  */
428 
429 void
place_new_loop(struct function * fn,class loop * loop)430 place_new_loop (struct function *fn, class loop *loop)
431 {
432   loop->num = number_of_loops (fn);
433   vec_safe_push (loops_for_fn (fn)->larray, loop);
434 }
435 
436 /* Given LOOP structure with filled header and latch, find the body of the
437    corresponding loop and add it to loops tree.  Insert the LOOP as a son of
438    outer.  */
439 
440 void
add_loop(class loop * loop,class loop * outer)441 add_loop (class loop *loop, class loop *outer)
442 {
443   basic_block *bbs;
444   int i, n;
445   class loop *subloop;
446   edge e;
447   edge_iterator ei;
448 
449   /* Add it to loop structure.  */
450   place_new_loop (cfun, loop);
451   flow_loop_tree_node_add (outer, loop);
452 
453   /* Find its nodes.  */
454   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
455   n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
456 
457   for (i = 0; i < n; i++)
458     {
459       if (bbs[i]->loop_father == outer)
460 	{
461 	  remove_bb_from_loops (bbs[i]);
462 	  add_bb_to_loop (bbs[i], loop);
463 	  continue;
464 	}
465 
466       loop->num_nodes++;
467 
468       /* If we find a direct subloop of OUTER, move it to LOOP.  */
469       subloop = bbs[i]->loop_father;
470       if (loop_outer (subloop) == outer
471 	  && subloop->header == bbs[i])
472 	{
473 	  flow_loop_tree_node_remove (subloop);
474 	  flow_loop_tree_node_add (loop, subloop);
475 	}
476     }
477 
478   /* Update the information about loop exit edges.  */
479   for (i = 0; i < n; i++)
480     {
481       FOR_EACH_EDGE (e, ei, bbs[i]->succs)
482 	{
483 	  rescan_loop_exit (e, false, false);
484 	}
485     }
486 
487   free (bbs);
488 }
489 
490 /* Scale profile of loop by P.  */
491 
492 void
scale_loop_frequencies(class loop * loop,profile_probability p)493 scale_loop_frequencies (class loop *loop, profile_probability p)
494 {
495   basic_block *bbs;
496 
497   bbs = get_loop_body (loop);
498   scale_bbs_frequencies (bbs, loop->num_nodes, p);
499   free (bbs);
500 }
501 
502 /* Scale profile in LOOP by P.
503    If ITERATION_BOUND is non-zero, scale even further if loop is predicted
504    to iterate too many times.
505    Before caling this function, preheader block profile should be already
506    scaled to final count.  This is necessary because loop iterations are
507    determined by comparing header edge count to latch ege count and thus
508    they need to be scaled synchronously.  */
509 
510 void
scale_loop_profile(class loop * loop,profile_probability p,gcov_type iteration_bound)511 scale_loop_profile (class loop *loop, profile_probability p,
512 		    gcov_type iteration_bound)
513 {
514   edge e, preheader_e;
515   edge_iterator ei;
516 
517   if (dump_file && (dump_flags & TDF_DETAILS))
518     {
519       fprintf (dump_file, ";; Scaling loop %i with scale ",
520 	       loop->num);
521       p.dump (dump_file);
522       fprintf (dump_file, " bounding iterations to %i\n",
523 	       (int)iteration_bound);
524     }
525 
526   /* Scale the probabilities.  */
527   scale_loop_frequencies (loop, p);
528 
529   if (iteration_bound == 0)
530     return;
531 
532   gcov_type iterations = expected_loop_iterations_unbounded (loop, NULL, true);
533 
534   if (dump_file && (dump_flags & TDF_DETAILS))
535     {
536       fprintf (dump_file, ";; guessed iterations after scaling %i\n",
537 	       (int)iterations);
538     }
539 
540   /* See if loop is predicted to iterate too many times.  */
541   if (iterations <= iteration_bound)
542     return;
543 
544   preheader_e = loop_preheader_edge (loop);
545 
546   /* We could handle also loops without preheaders, but bounding is
547      currently used only by optimizers that have preheaders constructed.  */
548   gcc_checking_assert (preheader_e);
549   profile_count count_in = preheader_e->count ();
550 
551   if (count_in > profile_count::zero ()
552       && loop->header->count.initialized_p ())
553     {
554       profile_count count_delta = profile_count::zero ();
555 
556       e = single_exit (loop);
557       if (e)
558 	{
559 	  edge other_e;
560 	  FOR_EACH_EDGE (other_e, ei, e->src->succs)
561 	    if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
562 		&& e != other_e)
563 	      break;
564 
565 	  /* Probability of exit must be 1/iterations.  */
566 	  count_delta = e->count ();
567 	  e->probability = profile_probability::always ()
568 				    .apply_scale (1, iteration_bound);
569 	  other_e->probability = e->probability.invert ();
570 
571 	  /* In code below we only handle the following two updates.  */
572 	  if (other_e->dest != loop->header
573 	      && other_e->dest != loop->latch
574 	      && (dump_file && (dump_flags & TDF_DETAILS)))
575 	    {
576 	      fprintf (dump_file, ";; giving up on update of paths from "
577 		       "exit condition to latch\n");
578 	    }
579 	}
580       else
581         if (dump_file && (dump_flags & TDF_DETAILS))
582 	  fprintf (dump_file, ";; Loop has multiple exit edges; "
583 	      		      "giving up on exit condition update\n");
584 
585       /* Roughly speaking we want to reduce the loop body profile by the
586 	 difference of loop iterations.  We however can do better if
587 	 we look at the actual profile, if it is available.  */
588       p = profile_probability::always ();
589 
590       count_in = count_in.apply_scale (iteration_bound, 1);
591       p = count_in.probability_in (loop->header->count);
592       if (!(p > profile_probability::never ()))
593 	p = profile_probability::very_unlikely ();
594 
595       if (p == profile_probability::always ()
596 	  || !p.initialized_p ())
597 	return;
598 
599       /* If latch exists, change its count, since we changed
600 	 probability of exit.  Theoretically we should update everything from
601 	 source of exit edge to latch, but for vectorizer this is enough.  */
602       if (loop->latch && loop->latch != e->src)
603 	loop->latch->count += count_delta;
604 
605       /* Scale the probabilities.  */
606       scale_loop_frequencies (loop, p);
607 
608       /* Change latch's count back.  */
609       if (loop->latch && loop->latch != e->src)
610 	loop->latch->count -= count_delta;
611 
612       if (dump_file && (dump_flags & TDF_DETAILS))
613 	fprintf (dump_file, ";; guessed iterations are now %i\n",
614 		 (int)expected_loop_iterations_unbounded (loop, NULL, true));
615     }
616 }
617 
618 /* Recompute dominance information for basic blocks outside LOOP.  */
619 
620 static void
update_dominators_in_loop(class loop * loop)621 update_dominators_in_loop (class loop *loop)
622 {
623   vec<basic_block> dom_bbs = vNULL;
624   basic_block *body;
625   unsigned i;
626 
627   auto_sbitmap seen (last_basic_block_for_fn (cfun));
628   bitmap_clear (seen);
629   body = get_loop_body (loop);
630 
631   for (i = 0; i < loop->num_nodes; i++)
632     bitmap_set_bit (seen, body[i]->index);
633 
634   for (i = 0; i < loop->num_nodes; i++)
635     {
636       basic_block ldom;
637 
638       for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
639 	   ldom;
640 	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
641 	if (!bitmap_bit_p (seen, ldom->index))
642 	  {
643 	    bitmap_set_bit (seen, ldom->index);
644 	    dom_bbs.safe_push (ldom);
645 	  }
646     }
647 
648   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
649   free (body);
650   dom_bbs.release ();
651 }
652 
653 /* Creates an if region as shown above. CONDITION is used to create
654    the test for the if.
655 
656    |
657    |     -------------                 -------------
658    |     |  pred_bb  |                 |  pred_bb  |
659    |     -------------                 -------------
660    |           |                             |
661    |           |                             | ENTRY_EDGE
662    |           | ENTRY_EDGE                  V
663    |           |             ====>     -------------
664    |           |                       |  cond_bb  |
665    |           |                       | CONDITION |
666    |           |                       -------------
667    |           V                        /         \
668    |     -------------         e_false /           \ e_true
669    |     |  succ_bb  |                V             V
670    |     -------------         -----------       -----------
671    |                           | false_bb |      | true_bb |
672    |                           -----------       -----------
673    |                                   \           /
674    |                                    \         /
675    |                                     V       V
676    |                                   -------------
677    |                                   |  join_bb  |
678    |                                   -------------
679    |                                         | exit_edge (result)
680    |                                         V
681    |                                    -----------
682    |                                    | succ_bb |
683    |                                    -----------
684    |
685  */
686 
687 edge
create_empty_if_region_on_edge(edge entry_edge,tree condition)688 create_empty_if_region_on_edge (edge entry_edge, tree condition)
689 {
690 
691   basic_block cond_bb, true_bb, false_bb, join_bb;
692   edge e_true, e_false, exit_edge;
693   gcond *cond_stmt;
694   tree simple_cond;
695   gimple_stmt_iterator gsi;
696 
697   cond_bb = split_edge (entry_edge);
698 
699   /* Insert condition in cond_bb.  */
700   gsi = gsi_last_bb (cond_bb);
701   simple_cond =
702     force_gimple_operand_gsi (&gsi, condition, true, NULL,
703 			      false, GSI_NEW_STMT);
704   cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
705   gsi = gsi_last_bb (cond_bb);
706   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
707 
708   join_bb = split_edge (single_succ_edge (cond_bb));
709 
710   e_true = single_succ_edge (cond_bb);
711   true_bb = split_edge (e_true);
712 
713   e_false = make_edge (cond_bb, join_bb, 0);
714   false_bb = split_edge (e_false);
715 
716   e_true->flags &= ~EDGE_FALLTHRU;
717   e_true->flags |= EDGE_TRUE_VALUE;
718   e_false->flags &= ~EDGE_FALLTHRU;
719   e_false->flags |= EDGE_FALSE_VALUE;
720 
721   set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
722   set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
723   set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
724   set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
725 
726   exit_edge = single_succ_edge (join_bb);
727 
728   if (single_pred_p (exit_edge->dest))
729     set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
730 
731   return exit_edge;
732 }
733 
734 /* create_empty_loop_on_edge
735    |
736    |    - pred_bb -                   ------ pred_bb ------
737    |   |           |                 | iv0 = initial_value |
738    |    -----|-----                   ---------|-----------
739    |         |                       ______    | entry_edge
740    |         | entry_edge           /      |   |
741    |         |             ====>   |      -V---V- loop_header -------------
742    |         V                     |     | iv_before = phi (iv0, iv_after) |
743    |    - succ_bb -                |      ---|-----------------------------
744    |   |           |               |         |
745    |    -----------                |      ---V--- loop_body ---------------
746    |                               |     | iv_after = iv_before + stride   |
747    |                               |     | if (iv_before < upper_bound)    |
748    |                               |      ---|--------------\--------------
749    |                               |         |               \ exit_e
750    |                               |         V                \
751    |                               |       - loop_latch -      V- succ_bb -
752    |                               |      |              |     |           |
753    |                               |       /-------------       -----------
754    |                                \ ___ /
755 
756    Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
757    that is used before the increment of IV. IV_BEFORE should be used for
758    adding code to the body that uses the IV.  OUTER is the outer loop in
759    which the new loop should be inserted.
760 
761    Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
762    inserted on the loop entry edge.  This implies that this function
763    should be used only when the UPPER_BOUND expression is a loop
764    invariant.  */
765 
766 class loop *
create_empty_loop_on_edge(edge entry_edge,tree initial_value,tree stride,tree upper_bound,tree iv,tree * iv_before,tree * iv_after,class loop * outer)767 create_empty_loop_on_edge (edge entry_edge,
768 			   tree initial_value,
769 			   tree stride, tree upper_bound,
770 			   tree iv,
771 			   tree *iv_before,
772 			   tree *iv_after,
773 			   class loop *outer)
774 {
775   basic_block loop_header, loop_latch, succ_bb, pred_bb;
776   class loop *loop;
777   gimple_stmt_iterator gsi;
778   gimple_seq stmts;
779   gcond *cond_expr;
780   tree exit_test;
781   edge exit_e;
782 
783   gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
784 
785   /* Create header, latch and wire up the loop.  */
786   pred_bb = entry_edge->src;
787   loop_header = split_edge (entry_edge);
788   loop_latch = split_edge (single_succ_edge (loop_header));
789   succ_bb = single_succ (loop_latch);
790   make_edge (loop_header, succ_bb, 0);
791   redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
792 
793   /* Set immediate dominator information.  */
794   set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
795   set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
796   set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
797 
798   /* Initialize a loop structure and put it in a loop hierarchy.  */
799   loop = alloc_loop ();
800   loop->header = loop_header;
801   loop->latch = loop_latch;
802   add_loop (loop, outer);
803 
804   /* TODO: Fix counts.  */
805   scale_loop_frequencies (loop, profile_probability::even ());
806 
807   /* Update dominators.  */
808   update_dominators_in_loop (loop);
809 
810   /* Modify edge flags.  */
811   exit_e = single_exit (loop);
812   exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
813   single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
814 
815   /* Construct IV code in loop.  */
816   initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
817   if (stmts)
818     {
819       gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
820       gsi_commit_edge_inserts ();
821     }
822 
823   upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
824   if (stmts)
825     {
826       gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
827       gsi_commit_edge_inserts ();
828     }
829 
830   gsi = gsi_last_bb (loop_header);
831   create_iv (initial_value, stride, iv, loop, &gsi, false,
832 	     iv_before, iv_after);
833 
834   /* Insert loop exit condition.  */
835   cond_expr = gimple_build_cond
836     (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
837 
838   exit_test = gimple_cond_lhs (cond_expr);
839   exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
840 					false, GSI_NEW_STMT);
841   gimple_cond_set_lhs (cond_expr, exit_test);
842   gsi = gsi_last_bb (exit_e->src);
843   gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
844 
845   split_block_after_labels (loop_header);
846 
847   return loop;
848 }
849 
850 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
851    latch to header and update loop tree and dominators
852    accordingly. Everything between them plus LATCH_EDGE destination must
853    be dominated by HEADER_EDGE destination, and back-reachable from
854    LATCH_EDGE source.  HEADER_EDGE is redirected to basic block SWITCH_BB,
855    FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
856    TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
857    Returns the newly created loop.  Frequencies and counts in the new loop
858    are scaled by FALSE_SCALE and in the old one by TRUE_SCALE.  */
859 
860 class loop *
loopify(edge latch_edge,edge header_edge,basic_block switch_bb,edge true_edge,edge false_edge,bool redirect_all_edges,profile_probability true_scale,profile_probability false_scale)861 loopify (edge latch_edge, edge header_edge,
862 	 basic_block switch_bb, edge true_edge, edge false_edge,
863 	 bool redirect_all_edges, profile_probability true_scale,
864 	 profile_probability false_scale)
865 {
866   basic_block succ_bb = latch_edge->dest;
867   basic_block pred_bb = header_edge->src;
868   class loop *loop = alloc_loop ();
869   class loop *outer = loop_outer (succ_bb->loop_father);
870   profile_count cnt;
871 
872   loop->header = header_edge->dest;
873   loop->latch = latch_edge->src;
874 
875   cnt = header_edge->count ();
876 
877   /* Redirect edges.  */
878   loop_redirect_edge (latch_edge, loop->header);
879   loop_redirect_edge (true_edge, succ_bb);
880 
881   /* During loop versioning, one of the switch_bb edge is already properly
882      set. Do not redirect it again unless redirect_all_edges is true.  */
883   if (redirect_all_edges)
884     {
885       loop_redirect_edge (header_edge, switch_bb);
886       loop_redirect_edge (false_edge, loop->header);
887 
888       /* Update dominators.  */
889       set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
890       set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
891     }
892 
893   set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
894 
895   /* Compute new loop.  */
896   add_loop (loop, outer);
897 
898   /* Add switch_bb to appropriate loop.  */
899   if (switch_bb->loop_father)
900     remove_bb_from_loops (switch_bb);
901   add_bb_to_loop (switch_bb, outer);
902 
903   /* Fix counts.  */
904   if (redirect_all_edges)
905     {
906       switch_bb->count = cnt;
907     }
908   scale_loop_frequencies (loop, false_scale);
909   scale_loop_frequencies (succ_bb->loop_father, true_scale);
910   update_dominators_in_loop (loop);
911 
912   return loop;
913 }
914 
915 /* Remove the latch edge of a LOOP and update loops to indicate that
916    the LOOP was removed.  After this function, original loop latch will
917    have no successor, which caller is expected to fix somehow.
918 
919    If this may cause the information about irreducible regions to become
920    invalid, IRRED_INVALIDATED is set to true.
921 
922    LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
923    basic blocks that had non-trivial update on their loop_father.*/
924 
925 void
unloop(class loop * loop,bool * irred_invalidated,bitmap loop_closed_ssa_invalidated)926 unloop (class loop *loop, bool *irred_invalidated,
927 	bitmap loop_closed_ssa_invalidated)
928 {
929   basic_block *body;
930   class loop *ploop;
931   unsigned i, n;
932   basic_block latch = loop->latch;
933   bool dummy = false;
934 
935   if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
936     *irred_invalidated = true;
937 
938   /* This is relatively straightforward.  The dominators are unchanged, as
939      loop header dominates loop latch, so the only thing we have to care of
940      is the placement of loops and basic blocks inside the loop tree.  We
941      move them all to the loop->outer, and then let fix_bb_placements do
942      its work.  */
943 
944   body = get_loop_body (loop);
945   n = loop->num_nodes;
946   for (i = 0; i < n; i++)
947     if (body[i]->loop_father == loop)
948       {
949 	remove_bb_from_loops (body[i]);
950 	add_bb_to_loop (body[i], loop_outer (loop));
951       }
952   free (body);
953 
954   while (loop->inner)
955     {
956       ploop = loop->inner;
957       flow_loop_tree_node_remove (ploop);
958       flow_loop_tree_node_add (loop_outer (loop), ploop);
959     }
960 
961   /* Remove the loop and free its data.  */
962   delete_loop (loop);
963 
964   remove_edge (single_succ_edge (latch));
965 
966   /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
967      there is an irreducible region inside the cancelled loop, the flags will
968      be still correct.  */
969   fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
970 }
971 
972 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
973    condition stated in description of fix_loop_placement holds for them.
974    It is used in case when we removed some edges coming out of LOOP, which
975    may cause the right placement of LOOP inside loop tree to change.
976 
977    IRRED_INVALIDATED is set to true if a change in the loop structures might
978    invalidate the information about irreducible regions.  */
979 
980 static void
fix_loop_placements(class loop * loop,bool * irred_invalidated)981 fix_loop_placements (class loop *loop, bool *irred_invalidated)
982 {
983   class loop *outer;
984 
985   while (loop_outer (loop))
986     {
987       outer = loop_outer (loop);
988       if (!fix_loop_placement (loop, irred_invalidated))
989 	break;
990 
991       /* Changing the placement of a loop in the loop tree may alter the
992 	 validity of condition 2) of the description of fix_bb_placement
993 	 for its preheader, because the successor is the header and belongs
994 	 to the loop.  So call fix_bb_placements to fix up the placement
995 	 of the preheader and (possibly) of its predecessors.  */
996       fix_bb_placements (loop_preheader_edge (loop)->src,
997 			 irred_invalidated, NULL);
998       loop = outer;
999     }
1000 }
1001 
1002 /* Duplicate loop bounds and other information we store about
1003    the loop into its duplicate.  */
1004 
1005 void
copy_loop_info(class loop * loop,class loop * target)1006 copy_loop_info (class loop *loop, class loop *target)
1007 {
1008   gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1009   target->any_upper_bound = loop->any_upper_bound;
1010   target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1011   target->any_likely_upper_bound = loop->any_likely_upper_bound;
1012   target->nb_iterations_likely_upper_bound
1013     = loop->nb_iterations_likely_upper_bound;
1014   target->any_estimate = loop->any_estimate;
1015   target->nb_iterations_estimate = loop->nb_iterations_estimate;
1016   target->estimate_state = loop->estimate_state;
1017   target->safelen = loop->safelen;
1018   target->simdlen = loop->simdlen;
1019   target->constraints = loop->constraints;
1020   target->can_be_parallel = loop->can_be_parallel;
1021   target->warned_aggressive_loop_optimizations
1022     |= loop->warned_aggressive_loop_optimizations;
1023   target->dont_vectorize = loop->dont_vectorize;
1024   target->force_vectorize = loop->force_vectorize;
1025   target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
1026   target->finite_p = loop->finite_p;
1027   target->unroll = loop->unroll;
1028   target->owned_clique = loop->owned_clique;
1029 }
1030 
1031 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1032    created loop into loops structure.  If AFTER is non-null
1033    the new loop is added at AFTER->next, otherwise in front of TARGETs
1034    sibling list.  */
1035 class loop *
duplicate_loop(class loop * loop,class loop * target,class loop * after)1036 duplicate_loop (class loop *loop, class loop *target, class loop *after)
1037 {
1038   class loop *cloop;
1039   cloop = alloc_loop ();
1040   place_new_loop (cfun, cloop);
1041 
1042   copy_loop_info (loop, cloop);
1043 
1044   /* Mark the new loop as copy of LOOP.  */
1045   set_loop_copy (loop, cloop);
1046 
1047   /* Add it to target.  */
1048   flow_loop_tree_node_add (target, cloop, after);
1049 
1050   return cloop;
1051 }
1052 
1053 /* Copies structure of subloops of LOOP into TARGET loop, placing
1054    newly created loops into loop tree at the end of TARGETs sibling
1055    list in the original order.  */
1056 void
duplicate_subloops(class loop * loop,class loop * target)1057 duplicate_subloops (class loop *loop, class loop *target)
1058 {
1059   class loop *aloop, *cloop, *tail;
1060 
1061   for (tail = target->inner; tail && tail->next; tail = tail->next)
1062     ;
1063   for (aloop = loop->inner; aloop; aloop = aloop->next)
1064     {
1065       cloop = duplicate_loop (aloop, target, tail);
1066       tail = cloop;
1067       gcc_assert(!tail->next);
1068       duplicate_subloops (aloop, cloop);
1069     }
1070 }
1071 
1072 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1073    into TARGET loop, placing newly created loops into loop tree adding
1074    them to TARGETs sibling list at the end in order.  */
1075 static void
copy_loops_to(class loop ** copied_loops,int n,class loop * target)1076 copy_loops_to (class loop **copied_loops, int n, class loop *target)
1077 {
1078   class loop *aloop, *tail;
1079   int i;
1080 
1081   for (tail = target->inner; tail && tail->next; tail = tail->next)
1082     ;
1083   for (i = 0; i < n; i++)
1084     {
1085       aloop = duplicate_loop (copied_loops[i], target, tail);
1086       tail = aloop;
1087       gcc_assert(!tail->next);
1088       duplicate_subloops (copied_loops[i], aloop);
1089     }
1090 }
1091 
1092 /* Redirects edge E to basic block DEST.  */
1093 static void
loop_redirect_edge(edge e,basic_block dest)1094 loop_redirect_edge (edge e, basic_block dest)
1095 {
1096   if (e->dest == dest)
1097     return;
1098 
1099   redirect_edge_and_branch_force (e, dest);
1100 }
1101 
1102 /* Check whether LOOP's body can be duplicated.  */
1103 bool
can_duplicate_loop_p(const class loop * loop)1104 can_duplicate_loop_p (const class loop *loop)
1105 {
1106   int ret;
1107   basic_block *bbs = get_loop_body (loop);
1108 
1109   ret = can_copy_bbs_p (bbs, loop->num_nodes);
1110   free (bbs);
1111 
1112   return ret;
1113 }
1114 
1115 /* Duplicates body of LOOP to given edge E NDUPL times.  Takes care of updating
1116    loop structure and dominators (order of inner subloops is retained).
1117    E's destination must be LOOP header for this to work, i.e. it must be entry
1118    or latch edge of this loop; these are unique, as the loops must have
1119    preheaders for this function to work correctly (in case E is latch, the
1120    function unrolls the loop, if E is entry edge, it peels the loop).  Store
1121    edges created by copying ORIG edge from copies corresponding to set bits in
1122    WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
1123    are numbered in order given by control flow through them) into TO_REMOVE
1124    array.  Returns false if duplication is
1125    impossible.  */
1126 
1127 bool
duplicate_loop_to_header_edge(class loop * loop,edge e,unsigned int ndupl,sbitmap wont_exit,edge orig,vec<edge> * to_remove,int flags)1128 duplicate_loop_to_header_edge (class loop *loop, edge e,
1129 			       unsigned int ndupl, sbitmap wont_exit,
1130 			       edge orig, vec<edge> *to_remove,
1131 			       int flags)
1132 {
1133   class loop *target, *aloop;
1134   class loop **orig_loops;
1135   unsigned n_orig_loops;
1136   basic_block header = loop->header, latch = loop->latch;
1137   basic_block *new_bbs, *bbs, *first_active;
1138   basic_block new_bb, bb, first_active_latch = NULL;
1139   edge ae, latch_edge;
1140   edge spec_edges[2], new_spec_edges[2];
1141   const int SE_LATCH = 0;
1142   const int SE_ORIG = 1;
1143   unsigned i, j, n;
1144   int is_latch = (latch == e->src);
1145   profile_probability *scale_step = NULL;
1146   profile_probability scale_main = profile_probability::always ();
1147   profile_probability scale_act = profile_probability::always ();
1148   profile_count after_exit_num = profile_count::zero (),
1149 	        after_exit_den = profile_count::zero ();
1150   bool scale_after_exit = false;
1151   int add_irreducible_flag;
1152   basic_block place_after;
1153   bitmap bbs_to_scale = NULL;
1154   bitmap_iterator bi;
1155 
1156   gcc_assert (e->dest == loop->header);
1157   gcc_assert (ndupl > 0);
1158 
1159   if (orig)
1160     {
1161       /* Orig must be edge out of the loop.  */
1162       gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1163       gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1164     }
1165 
1166   n = loop->num_nodes;
1167   bbs = get_loop_body_in_dom_order (loop);
1168   gcc_assert (bbs[0] == loop->header);
1169   gcc_assert (bbs[n  - 1] == loop->latch);
1170 
1171   /* Check whether duplication is possible.  */
1172   if (!can_copy_bbs_p (bbs, loop->num_nodes))
1173     {
1174       free (bbs);
1175       return false;
1176     }
1177   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1178 
1179   /* In case we are doing loop peeling and the loop is in the middle of
1180      irreducible region, the peeled copies will be inside it too.  */
1181   add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1182   gcc_assert (!is_latch || !add_irreducible_flag);
1183 
1184   /* Find edge from latch.  */
1185   latch_edge = loop_latch_edge (loop);
1186 
1187   if (flags & DLTHE_FLAG_UPDATE_FREQ)
1188     {
1189       /* Calculate coefficients by that we have to scale counts
1190 	 of duplicated loop bodies.  */
1191       profile_count count_in = header->count;
1192       profile_count count_le = latch_edge->count ();
1193       profile_count count_out_orig = orig ? orig->count () : count_in - count_le;
1194       profile_probability prob_pass_thru = count_le.probability_in (count_in);
1195       profile_probability prob_pass_wont_exit =
1196 	      (count_le + count_out_orig).probability_in (count_in);
1197 
1198       if (orig && orig->probability.initialized_p ()
1199 	  && !(orig->probability == profile_probability::always ()))
1200 	{
1201 	  /* The blocks that are dominated by a removed exit edge ORIG have
1202 	     frequencies scaled by this.  */
1203 	  if (orig->count ().initialized_p ())
1204 	    {
1205 	      after_exit_num = orig->src->count;
1206 	      after_exit_den = after_exit_num - orig->count ();
1207 	      scale_after_exit = true;
1208 	    }
1209 	  bbs_to_scale = BITMAP_ALLOC (NULL);
1210 	  for (i = 0; i < n; i++)
1211 	    {
1212 	      if (bbs[i] != orig->src
1213 		  && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1214 		bitmap_set_bit (bbs_to_scale, i);
1215 	    }
1216 	}
1217 
1218       scale_step = XNEWVEC (profile_probability, ndupl);
1219 
1220       for (i = 1; i <= ndupl; i++)
1221 	scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1222 				? prob_pass_wont_exit
1223 				: prob_pass_thru;
1224 
1225       /* Complete peeling is special as the probability of exit in last
1226 	 copy becomes 1.  */
1227       if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1228 	{
1229 	  profile_count wanted_count = e->count ();
1230 
1231 	  gcc_assert (!is_latch);
1232 	  /* First copy has count of incoming edge.  Each subsequent
1233 	     count should be reduced by prob_pass_wont_exit.  Caller
1234 	     should've managed the flags so all except for original loop
1235 	     has won't exist set.  */
1236 	  scale_act = wanted_count.probability_in (count_in);
1237 	  /* Now simulate the duplication adjustments and compute header
1238 	     frequency of the last copy.  */
1239 	  for (i = 0; i < ndupl; i++)
1240 	    wanted_count = wanted_count.apply_probability (scale_step [i]);
1241 	  scale_main = wanted_count.probability_in (count_in);
1242 	}
1243       /* Here we insert loop bodies inside the loop itself (for loop unrolling).
1244 	 First iteration will be original loop followed by duplicated bodies.
1245 	 It is necessary to scale down the original so we get right overall
1246 	 number of iterations.  */
1247       else if (is_latch)
1248 	{
1249 	  profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
1250 							? prob_pass_wont_exit
1251 							: prob_pass_thru;
1252 	  profile_probability p = prob_pass_main;
1253 	  profile_count scale_main_den = count_in;
1254 	  for (i = 0; i < ndupl; i++)
1255 	    {
1256 	      scale_main_den += count_in.apply_probability (p);
1257 	      p = p * scale_step[i];
1258 	    }
1259 	  /* If original loop is executed COUNT_IN times, the unrolled
1260 	     loop will account SCALE_MAIN_DEN times.  */
1261 	  scale_main = count_in.probability_in (scale_main_den);
1262 	  scale_act = scale_main * prob_pass_main;
1263 	}
1264       else
1265 	{
1266 	  profile_count preheader_count = e->count ();
1267 	  for (i = 0; i < ndupl; i++)
1268 	    scale_main = scale_main * scale_step[i];
1269 	  scale_act = preheader_count.probability_in (count_in);
1270 	}
1271     }
1272 
1273   /* Loop the new bbs will belong to.  */
1274   target = e->src->loop_father;
1275 
1276   /* Original loops.  */
1277   n_orig_loops = 0;
1278   for (aloop = loop->inner; aloop; aloop = aloop->next)
1279     n_orig_loops++;
1280   orig_loops = XNEWVEC (class loop *, n_orig_loops);
1281   for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1282     orig_loops[i] = aloop;
1283 
1284   set_loop_copy (loop, target);
1285 
1286   first_active = XNEWVEC (basic_block, n);
1287   if (is_latch)
1288     {
1289       memcpy (first_active, bbs, n * sizeof (basic_block));
1290       first_active_latch = latch;
1291     }
1292 
1293   spec_edges[SE_ORIG] = orig;
1294   spec_edges[SE_LATCH] = latch_edge;
1295 
1296   place_after = e->src;
1297   for (j = 0; j < ndupl; j++)
1298     {
1299       /* Copy loops.  */
1300       copy_loops_to (orig_loops, n_orig_loops, target);
1301 
1302       /* Copy bbs.  */
1303       copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1304 		place_after, true);
1305       place_after = new_spec_edges[SE_LATCH]->src;
1306 
1307       if (flags & DLTHE_RECORD_COPY_NUMBER)
1308 	for (i = 0; i < n; i++)
1309 	  {
1310 	    gcc_assert (!new_bbs[i]->aux);
1311 	    new_bbs[i]->aux = (void *)(size_t)(j + 1);
1312 	  }
1313 
1314       /* Note whether the blocks and edges belong to an irreducible loop.  */
1315       if (add_irreducible_flag)
1316 	{
1317 	  for (i = 0; i < n; i++)
1318 	    new_bbs[i]->flags |= BB_DUPLICATED;
1319 	  for (i = 0; i < n; i++)
1320 	    {
1321 	      edge_iterator ei;
1322 	      new_bb = new_bbs[i];
1323 	      if (new_bb->loop_father == target)
1324 		new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1325 
1326 	      FOR_EACH_EDGE (ae, ei, new_bb->succs)
1327 		if ((ae->dest->flags & BB_DUPLICATED)
1328 		    && (ae->src->loop_father == target
1329 			|| ae->dest->loop_father == target))
1330 		  ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1331 	    }
1332 	  for (i = 0; i < n; i++)
1333 	    new_bbs[i]->flags &= ~BB_DUPLICATED;
1334 	}
1335 
1336       /* Redirect the special edges.  */
1337       if (is_latch)
1338 	{
1339 	  redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1340 	  redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1341 					  loop->header);
1342 	  set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1343 	  latch = loop->latch = new_bbs[n - 1];
1344 	  e = latch_edge = new_spec_edges[SE_LATCH];
1345 	}
1346       else
1347 	{
1348 	  redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1349 					  loop->header);
1350 	  redirect_edge_and_branch_force (e, new_bbs[0]);
1351 	  set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1352 	  e = new_spec_edges[SE_LATCH];
1353 	}
1354 
1355       /* Record exit edge in this copy.  */
1356       if (orig && bitmap_bit_p (wont_exit, j + 1))
1357 	{
1358 	  if (to_remove)
1359 	    to_remove->safe_push (new_spec_edges[SE_ORIG]);
1360 	  force_edge_cold (new_spec_edges[SE_ORIG], true);
1361 
1362 	  /* Scale the frequencies of the blocks dominated by the exit.  */
1363 	  if (bbs_to_scale && scale_after_exit)
1364 	    {
1365 	      EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1366 		scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num,
1367 						     after_exit_den);
1368 	    }
1369 	}
1370 
1371       /* Record the first copy in the control flow order if it is not
1372 	 the original loop (i.e. in case of peeling).  */
1373       if (!first_active_latch)
1374 	{
1375 	  memcpy (first_active, new_bbs, n * sizeof (basic_block));
1376 	  first_active_latch = new_bbs[n - 1];
1377 	}
1378 
1379       /* Set counts and frequencies.  */
1380       if (flags & DLTHE_FLAG_UPDATE_FREQ)
1381 	{
1382 	  scale_bbs_frequencies (new_bbs, n, scale_act);
1383 	  scale_act = scale_act * scale_step[j];
1384 	}
1385     }
1386   free (new_bbs);
1387   free (orig_loops);
1388 
1389   /* Record the exit edge in the original loop body, and update the frequencies.  */
1390   if (orig && bitmap_bit_p (wont_exit, 0))
1391     {
1392       if (to_remove)
1393 	to_remove->safe_push (orig);
1394       force_edge_cold (orig, true);
1395 
1396       /* Scale the frequencies of the blocks dominated by the exit.  */
1397       if (bbs_to_scale && scale_after_exit)
1398 	{
1399 	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1400 	    scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num,
1401 						 after_exit_den);
1402 	}
1403     }
1404 
1405   /* Update the original loop.  */
1406   if (!is_latch)
1407     set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1408   if (flags & DLTHE_FLAG_UPDATE_FREQ)
1409     {
1410       scale_bbs_frequencies (bbs, n, scale_main);
1411       free (scale_step);
1412     }
1413 
1414   /* Update dominators of outer blocks if affected.  */
1415   for (i = 0; i < n; i++)
1416     {
1417       basic_block dominated, dom_bb;
1418       vec<basic_block> dom_bbs;
1419       unsigned j;
1420 
1421       bb = bbs[i];
1422       bb->aux = 0;
1423 
1424       dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1425       FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1426 	{
1427 	  if (flow_bb_inside_loop_p (loop, dominated))
1428 	    continue;
1429 	  dom_bb = nearest_common_dominator (
1430 			CDI_DOMINATORS, first_active[i], first_active_latch);
1431 	  set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1432 	}
1433       dom_bbs.release ();
1434     }
1435   free (first_active);
1436 
1437   free (bbs);
1438   BITMAP_FREE (bbs_to_scale);
1439 
1440   return true;
1441 }
1442 
1443 /* A callback for make_forwarder block, to redirect all edges except for
1444    MFB_KJ_EDGE to the entry part.  E is the edge for that we should decide
1445    whether to redirect it.  */
1446 
1447 edge mfb_kj_edge;
1448 bool
mfb_keep_just(edge e)1449 mfb_keep_just (edge e)
1450 {
1451   return e != mfb_kj_edge;
1452 }
1453 
1454 /* True when a candidate preheader BLOCK has predecessors from LOOP.  */
1455 
1456 static bool
has_preds_from_loop(basic_block block,class loop * loop)1457 has_preds_from_loop (basic_block block, class loop *loop)
1458 {
1459   edge e;
1460   edge_iterator ei;
1461 
1462   FOR_EACH_EDGE (e, ei, block->preds)
1463     if (e->src->loop_father == loop)
1464       return true;
1465   return false;
1466 }
1467 
1468 /* Creates a pre-header for a LOOP.  Returns newly created block.  Unless
1469    CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1470    entry; otherwise we also force preheader block to have only one successor.
1471    When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1472    to be a fallthru predecessor to the loop header and to have only
1473    predecessors from outside of the loop.
1474    The function also updates dominators.  */
1475 
1476 basic_block
create_preheader(class loop * loop,int flags)1477 create_preheader (class loop *loop, int flags)
1478 {
1479   edge e;
1480   basic_block dummy;
1481   int nentry = 0;
1482   bool irred = false;
1483   bool latch_edge_was_fallthru;
1484   edge one_succ_pred = NULL, single_entry = NULL;
1485   edge_iterator ei;
1486 
1487   FOR_EACH_EDGE (e, ei, loop->header->preds)
1488     {
1489       if (e->src == loop->latch)
1490 	continue;
1491       irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1492       nentry++;
1493       single_entry = e;
1494       if (single_succ_p (e->src))
1495 	one_succ_pred = e;
1496     }
1497   gcc_assert (nentry);
1498   if (nentry == 1)
1499     {
1500       bool need_forwarder_block = false;
1501 
1502       /* We do not allow entry block to be the loop preheader, since we
1503 	     cannot emit code there.  */
1504       if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1505         need_forwarder_block = true;
1506       else
1507         {
1508           /* If we want simple preheaders, also force the preheader to have
1509 	     just a single successor and a normal edge.  */
1510           if ((flags & CP_SIMPLE_PREHEADERS)
1511 	      && ((single_entry->flags & EDGE_COMPLEX)
1512 		  || !single_succ_p (single_entry->src)))
1513             need_forwarder_block = true;
1514           /* If we want fallthru preheaders, also create forwarder block when
1515              preheader ends with a jump or has predecessors from loop.  */
1516           else if ((flags & CP_FALLTHRU_PREHEADERS)
1517                    && (JUMP_P (BB_END (single_entry->src))
1518                        || has_preds_from_loop (single_entry->src, loop)))
1519             need_forwarder_block = true;
1520         }
1521       if (! need_forwarder_block)
1522 	return NULL;
1523     }
1524 
1525   mfb_kj_edge = loop_latch_edge (loop);
1526   latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1527   if (nentry == 1
1528       && ((flags & CP_FALLTHRU_PREHEADERS) == 0
1529   	  || (single_entry->flags & EDGE_CROSSING) == 0))
1530     dummy = split_edge (single_entry);
1531   else
1532     {
1533       edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1534       dummy = fallthru->src;
1535       loop->header = fallthru->dest;
1536     }
1537 
1538   /* Try to be clever in placing the newly created preheader.  The idea is to
1539      avoid breaking any "fallthruness" relationship between blocks.
1540 
1541      The preheader was created just before the header and all incoming edges
1542      to the header were redirected to the preheader, except the latch edge.
1543      So the only problematic case is when this latch edge was a fallthru
1544      edge: it is not anymore after the preheader creation so we have broken
1545      the fallthruness.  We're therefore going to look for a better place.  */
1546   if (latch_edge_was_fallthru)
1547     {
1548       if (one_succ_pred)
1549 	e = one_succ_pred;
1550       else
1551 	e = EDGE_PRED (dummy, 0);
1552 
1553       move_block_after (dummy, e->src);
1554     }
1555 
1556   if (irred)
1557     {
1558       dummy->flags |= BB_IRREDUCIBLE_LOOP;
1559       single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1560     }
1561 
1562   if (dump_file)
1563     fprintf (dump_file, "Created preheader block for loop %i\n",
1564 	     loop->num);
1565 
1566   if (flags & CP_FALLTHRU_PREHEADERS)
1567     gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1568                 && !JUMP_P (BB_END (dummy)));
1569 
1570   return dummy;
1571 }
1572 
1573 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader.  */
1574 
1575 void
create_preheaders(int flags)1576 create_preheaders (int flags)
1577 {
1578   class loop *loop;
1579 
1580   if (!current_loops)
1581     return;
1582 
1583   FOR_EACH_LOOP (loop, 0)
1584     create_preheader (loop, flags);
1585   loops_state_set (LOOPS_HAVE_PREHEADERS);
1586 }
1587 
1588 /* Forces all loop latches to have only single successor.  */
1589 
1590 void
force_single_succ_latches(void)1591 force_single_succ_latches (void)
1592 {
1593   class loop *loop;
1594   edge e;
1595 
1596   FOR_EACH_LOOP (loop, 0)
1597     {
1598       if (loop->latch != loop->header && single_succ_p (loop->latch))
1599 	continue;
1600 
1601       e = find_edge (loop->latch, loop->header);
1602       gcc_checking_assert (e != NULL);
1603 
1604       split_edge (e);
1605     }
1606   loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1607 }
1608 
1609 /* This function is called from loop_version.  It splits the entry edge
1610    of the loop we want to version, adds the versioning condition, and
1611    adjust the edges to the two versions of the loop appropriately.
1612    e is an incoming edge. Returns the basic block containing the
1613    condition.
1614 
1615    --- edge e ---- > [second_head]
1616 
1617    Split it and insert new conditional expression and adjust edges.
1618 
1619     --- edge e ---> [cond expr] ---> [first_head]
1620 			|
1621 			+---------> [second_head]
1622 
1623   THEN_PROB is the probability of then branch of the condition.
1624   ELSE_PROB is the probability of else branch. Note that they may be both
1625   REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or
1626   IFN_LOOP_DIST_ALIAS.  */
1627 
1628 static basic_block
lv_adjust_loop_entry_edge(basic_block first_head,basic_block second_head,edge e,void * cond_expr,profile_probability then_prob,profile_probability else_prob)1629 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1630 			   edge e, void *cond_expr,
1631 			   profile_probability then_prob,
1632 			   profile_probability else_prob)
1633 {
1634   basic_block new_head = NULL;
1635   edge e1;
1636 
1637   gcc_assert (e->dest == second_head);
1638 
1639   /* Split edge 'e'. This will create a new basic block, where we can
1640      insert conditional expr.  */
1641   new_head = split_edge (e);
1642 
1643   lv_add_condition_to_bb (first_head, second_head, new_head,
1644 			  cond_expr);
1645 
1646   /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there.  */
1647   e = single_succ_edge (new_head);
1648   e1 = make_edge (new_head, first_head,
1649 		  current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1650   e1->probability = then_prob;
1651   e->probability = else_prob;
1652 
1653   set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1654   set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1655 
1656   /* Adjust loop header phi nodes.  */
1657   lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1658 
1659   return new_head;
1660 }
1661 
1662 /* Main entry point for Loop Versioning transformation.
1663 
1664    This transformation given a condition and a loop, creates
1665    -if (condition) { loop_copy1 } else { loop_copy2 },
1666    where loop_copy1 is the loop transformed in one way, and loop_copy2
1667    is the loop transformed in another way (or unchanged). COND_EXPR
1668    may be a run time test for things that were not resolved by static
1669    analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1670 
1671    If non-NULL, CONDITION_BB is set to the basic block containing the
1672    condition.
1673 
1674    THEN_PROB is the probability of the then edge of the if.  THEN_SCALE
1675    is the ratio by that the frequencies in the original loop should
1676    be scaled.  ELSE_SCALE is the ratio by that the frequencies in the
1677    new loop should be scaled.
1678 
1679    If PLACE_AFTER is true, we place the new loop after LOOP in the
1680    instruction stream, otherwise it is placed before LOOP.  */
1681 
1682 class loop *
loop_version(class loop * loop,void * cond_expr,basic_block * condition_bb,profile_probability then_prob,profile_probability else_prob,profile_probability then_scale,profile_probability else_scale,bool place_after)1683 loop_version (class loop *loop,
1684 	      void *cond_expr, basic_block *condition_bb,
1685 	      profile_probability then_prob, profile_probability else_prob,
1686 	      profile_probability then_scale, profile_probability else_scale,
1687 	      bool place_after)
1688 {
1689   basic_block first_head, second_head;
1690   edge entry, latch_edge, true_edge, false_edge;
1691   int irred_flag;
1692   class loop *nloop;
1693   basic_block cond_bb;
1694 
1695   /* Record entry and latch edges for the loop */
1696   entry = loop_preheader_edge (loop);
1697   irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1698   entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1699 
1700   /* Note down head of loop as first_head.  */
1701   first_head = entry->dest;
1702 
1703   /* Duplicate loop.  */
1704   if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1705 					       NULL, NULL, NULL, 0))
1706     {
1707       entry->flags |= irred_flag;
1708       return NULL;
1709     }
1710 
1711   /* After duplication entry edge now points to new loop head block.
1712      Note down new head as second_head.  */
1713   second_head = entry->dest;
1714 
1715   /* Split loop entry edge and insert new block with cond expr.  */
1716   cond_bb =  lv_adjust_loop_entry_edge (first_head, second_head,
1717 					entry, cond_expr, then_prob, else_prob);
1718   if (condition_bb)
1719     *condition_bb = cond_bb;
1720 
1721   if (!cond_bb)
1722     {
1723       entry->flags |= irred_flag;
1724       return NULL;
1725     }
1726 
1727   latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1728 
1729   extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1730   nloop = loopify (latch_edge,
1731 		   single_pred_edge (get_bb_copy (loop->header)),
1732 		   cond_bb, true_edge, false_edge,
1733 		   false /* Do not redirect all edges.  */,
1734 		   then_scale, else_scale);
1735 
1736   copy_loop_info (loop, nloop);
1737 
1738   /* loopify redirected latch_edge. Update its PENDING_STMTS.  */
1739   lv_flush_pending_stmts (latch_edge);
1740 
1741   /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS.  */
1742   extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1743   lv_flush_pending_stmts (false_edge);
1744   /* Adjust irreducible flag.  */
1745   if (irred_flag)
1746     {
1747       cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1748       loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1749       loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1750       single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1751     }
1752 
1753   if (place_after)
1754     {
1755       basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1756       unsigned i;
1757 
1758       after = loop->latch;
1759 
1760       for (i = 0; i < nloop->num_nodes; i++)
1761 	{
1762 	  move_block_after (bbs[i], after);
1763 	  after = bbs[i];
1764 	}
1765       free (bbs);
1766     }
1767 
1768   /* At this point condition_bb is loop preheader with two successors,
1769      first_head and second_head.   Make sure that loop preheader has only
1770      one successor.  */
1771   split_edge (loop_preheader_edge (loop));
1772   split_edge (loop_preheader_edge (nloop));
1773 
1774   return nloop;
1775 }
1776