1 /* Loop manipulation code for GNU compiler.
2    Copyright (C) 2002-2013 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "basic-block.h"
26 #include "cfgloop.h"
27 #include "tree-flow.h"
28 #include "dumpfile.h"
29 
30 static void copy_loops_to (struct loop **, int,
31 			   struct loop *);
32 static void loop_redirect_edge (edge, basic_block);
33 static void remove_bbs (basic_block *, int);
34 static bool rpe_enum_p (const_basic_block, const void *);
35 static int find_path (edge, basic_block **);
36 static void fix_loop_placements (struct loop *, bool *);
37 static bool fix_bb_placement (basic_block);
38 static void fix_bb_placements (basic_block, bool *, bitmap);
39 
40 /* Checks whether basic block BB is dominated by DATA.  */
41 static bool
rpe_enum_p(const_basic_block bb,const void * data)42 rpe_enum_p (const_basic_block bb, const void *data)
43 {
44   return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
45 }
46 
47 /* Remove basic blocks BBS.  NBBS is the number of the basic blocks.  */
48 
49 static void
remove_bbs(basic_block * bbs,int nbbs)50 remove_bbs (basic_block *bbs, int nbbs)
51 {
52   int i;
53 
54   for (i = 0; i < nbbs; i++)
55     delete_basic_block (bbs[i]);
56 }
57 
58 /* Find path -- i.e. the basic blocks dominated by edge E and put them
59    into array BBS, that will be allocated large enough to contain them.
60    E->dest must have exactly one predecessor for this to work (it is
61    easy to achieve and we do not put it here because we do not want to
62    alter anything by this function).  The number of basic blocks in the
63    path is returned.  */
64 static int
find_path(edge e,basic_block ** bbs)65 find_path (edge e, basic_block **bbs)
66 {
67   gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
68 
69   /* Find bbs in the path.  */
70   *bbs = XNEWVEC (basic_block, n_basic_blocks);
71   return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
72 			     n_basic_blocks, e->dest);
73 }
74 
75 /* Fix placement of basic block BB inside loop hierarchy --
76    Let L be a loop to that BB belongs.  Then every successor of BB must either
77      1) belong to some superloop of loop L, or
78      2) be a header of loop K such that K->outer is superloop of L
79    Returns true if we had to move BB into other loop to enforce this condition,
80    false if the placement of BB was already correct (provided that placements
81    of its successors are correct).  */
82 static bool
fix_bb_placement(basic_block bb)83 fix_bb_placement (basic_block bb)
84 {
85   edge e;
86   edge_iterator ei;
87   struct loop *loop = current_loops->tree_root, *act;
88 
89   FOR_EACH_EDGE (e, ei, bb->succs)
90     {
91       if (e->dest == EXIT_BLOCK_PTR)
92 	continue;
93 
94       act = e->dest->loop_father;
95       if (act->header == e->dest)
96 	act = loop_outer (act);
97 
98       if (flow_loop_nested_p (loop, act))
99 	loop = act;
100     }
101 
102   if (loop == bb->loop_father)
103     return false;
104 
105   remove_bb_from_loops (bb);
106   add_bb_to_loop (bb, loop);
107 
108   return true;
109 }
110 
111 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
112    of LOOP to that leads at least one exit edge of LOOP, and set it
113    as the immediate superloop of LOOP.  Return true if the immediate superloop
114    of LOOP changed.
115 
116    IRRED_INVALIDATED is set to true if a change in the loop structures might
117    invalidate the information about irreducible regions.  */
118 
119 static bool
fix_loop_placement(struct loop * loop,bool * irred_invalidated)120 fix_loop_placement (struct loop *loop, bool *irred_invalidated)
121 {
122   unsigned i;
123   edge e;
124   vec<edge> exits = get_loop_exit_edges (loop);
125   struct loop *father = current_loops->tree_root, *act;
126   bool ret = false;
127 
128   FOR_EACH_VEC_ELT (exits, i, e)
129     {
130       act = find_common_loop (loop, e->dest->loop_father);
131       if (flow_loop_nested_p (father, act))
132 	father = act;
133     }
134 
135   if (father != loop_outer (loop))
136     {
137       for (act = loop_outer (loop); act != father; act = loop_outer (act))
138 	act->num_nodes -= loop->num_nodes;
139       flow_loop_tree_node_remove (loop);
140       flow_loop_tree_node_add (father, loop);
141 
142       /* The exit edges of LOOP no longer exits its original immediate
143 	 superloops; remove them from the appropriate exit lists.  */
144       FOR_EACH_VEC_ELT (exits, i, e)
145 	{
146 	  /* We may need to recompute irreducible loops.  */
147 	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
148 	    *irred_invalidated = true;
149 	  rescan_loop_exit (e, false, false);
150 	}
151 
152       ret = true;
153     }
154 
155   exits.release ();
156   return ret;
157 }
158 
159 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
160    enforce condition condition stated in description of fix_bb_placement. We
161    start from basic block FROM that had some of its successors removed, so that
162    his placement no longer has to be correct, and iteratively fix placement of
163    its predecessors that may change if placement of FROM changed.  Also fix
164    placement of subloops of FROM->loop_father, that might also be altered due
165    to this change; the condition for them is similar, except that instead of
166    successors we consider edges coming out of the loops.
167 
168    If the changes may invalidate the information about irreducible regions,
169    IRRED_INVALIDATED is set to true.
170 
171    If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
172    changed loop_father are collected there. */
173 
174 static void
fix_bb_placements(basic_block from,bool * irred_invalidated,bitmap loop_closed_ssa_invalidated)175 fix_bb_placements (basic_block from,
176 		   bool *irred_invalidated,
177 		   bitmap loop_closed_ssa_invalidated)
178 {
179   sbitmap in_queue;
180   basic_block *queue, *qtop, *qbeg, *qend;
181   struct loop *base_loop, *target_loop;
182   edge e;
183 
184   /* We pass through blocks back-reachable from FROM, testing whether some
185      of their successors moved to outer loop.  It may be necessary to
186      iterate several times, but it is finite, as we stop unless we move
187      the basic block up the loop structure.  The whole story is a bit
188      more complicated due to presence of subloops, those are moved using
189      fix_loop_placement.  */
190 
191   base_loop = from->loop_father;
192   /* If we are already in the outermost loop, the basic blocks cannot be moved
193      outside of it.  If FROM is the header of the base loop, it cannot be moved
194      outside of it, either.  In both cases, we can end now.  */
195   if (base_loop == current_loops->tree_root
196       || from == base_loop->header)
197     return;
198 
199   in_queue = sbitmap_alloc (last_basic_block);
200   bitmap_clear (in_queue);
201   bitmap_set_bit (in_queue, from->index);
202   /* Prevent us from going out of the base_loop.  */
203   bitmap_set_bit (in_queue, base_loop->header->index);
204 
205   queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
206   qtop = queue + base_loop->num_nodes + 1;
207   qbeg = queue;
208   qend = queue + 1;
209   *qbeg = from;
210 
211   while (qbeg != qend)
212     {
213       edge_iterator ei;
214       from = *qbeg;
215       qbeg++;
216       if (qbeg == qtop)
217 	qbeg = queue;
218       bitmap_clear_bit (in_queue, from->index);
219 
220       if (from->loop_father->header == from)
221 	{
222 	  /* Subloop header, maybe move the loop upward.  */
223 	  if (!fix_loop_placement (from->loop_father, irred_invalidated))
224 	    continue;
225 	  target_loop = loop_outer (from->loop_father);
226 	}
227       else
228 	{
229 	  /* Ordinary basic block.  */
230 	  if (!fix_bb_placement (from))
231 	    continue;
232 	  if (loop_closed_ssa_invalidated)
233 	    bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
234 	  target_loop = from->loop_father;
235 	}
236 
237       FOR_EACH_EDGE (e, ei, from->succs)
238 	{
239 	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
240 	    *irred_invalidated = true;
241 	}
242 
243       /* Something has changed, insert predecessors into queue.  */
244       FOR_EACH_EDGE (e, ei, from->preds)
245 	{
246 	  basic_block pred = e->src;
247 	  struct loop *nca;
248 
249 	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
250 	    *irred_invalidated = true;
251 
252 	  if (bitmap_bit_p (in_queue, pred->index))
253 	    continue;
254 
255 	  /* If it is subloop, then it either was not moved, or
256 	     the path up the loop tree from base_loop do not contain
257 	     it.  */
258 	  nca = find_common_loop (pred->loop_father, base_loop);
259 	  if (pred->loop_father != base_loop
260 	      && (nca == base_loop
261 		  || nca != pred->loop_father))
262 	    pred = pred->loop_father->header;
263 	  else if (!flow_loop_nested_p (target_loop, pred->loop_father))
264 	    {
265 	      /* If PRED is already higher in the loop hierarchy than the
266 		 TARGET_LOOP to that we moved FROM, the change of the position
267 		 of FROM does not affect the position of PRED, so there is no
268 		 point in processing it.  */
269 	      continue;
270 	    }
271 
272 	  if (bitmap_bit_p (in_queue, pred->index))
273 	    continue;
274 
275 	  /* Schedule the basic block.  */
276 	  *qend = pred;
277 	  qend++;
278 	  if (qend == qtop)
279 	    qend = queue;
280 	  bitmap_set_bit (in_queue, pred->index);
281 	}
282     }
283   free (in_queue);
284   free (queue);
285 }
286 
287 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
288    and update loop structures and dominators.  Return true if we were able
289    to remove the path, false otherwise (and nothing is affected then).  */
290 bool
remove_path(edge e)291 remove_path (edge e)
292 {
293   edge ae;
294   basic_block *rem_bbs, *bord_bbs, from, bb;
295   vec<basic_block> dom_bbs;
296   int i, nrem, n_bord_bbs;
297   sbitmap seen;
298   bool irred_invalidated = false;
299   edge_iterator ei;
300   struct loop *l, *f;
301 
302   if (!can_remove_branch_p (e))
303     return false;
304 
305   /* Keep track of whether we need to update information about irreducible
306      regions.  This is the case if the removed area is a part of the
307      irreducible region, or if the set of basic blocks that belong to a loop
308      that is inside an irreducible region is changed, or if such a loop is
309      removed.  */
310   if (e->flags & EDGE_IRREDUCIBLE_LOOP)
311     irred_invalidated = true;
312 
313   /* We need to check whether basic blocks are dominated by the edge
314      e, but we only have basic block dominators.  This is easy to
315      fix -- when e->dest has exactly one predecessor, this corresponds
316      to blocks dominated by e->dest, if not, split the edge.  */
317   if (!single_pred_p (e->dest))
318     e = single_pred_edge (split_edge (e));
319 
320   /* It may happen that by removing path we remove one or more loops
321      we belong to.  In this case first unloop the loops, then proceed
322      normally.   We may assume that e->dest is not a header of any loop,
323      as it now has exactly one predecessor.  */
324   for (l = e->src->loop_father; loop_outer (l); l = f)
325     {
326       f = loop_outer (l);
327       if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
328         unloop (l, &irred_invalidated, NULL);
329     }
330 
331   /* Identify the path.  */
332   nrem = find_path (e, &rem_bbs);
333 
334   n_bord_bbs = 0;
335   bord_bbs = XNEWVEC (basic_block, n_basic_blocks);
336   seen = sbitmap_alloc (last_basic_block);
337   bitmap_clear (seen);
338 
339   /* Find "border" hexes -- i.e. those with predecessor in removed path.  */
340   for (i = 0; i < nrem; i++)
341     bitmap_set_bit (seen, rem_bbs[i]->index);
342   if (!irred_invalidated)
343     FOR_EACH_EDGE (ae, ei, e->src->succs)
344       if (ae != e && ae->dest != EXIT_BLOCK_PTR && !bitmap_bit_p (seen, ae->dest->index)
345 	  && ae->flags & EDGE_IRREDUCIBLE_LOOP)
346 	irred_invalidated = true;
347   for (i = 0; i < nrem; i++)
348     {
349       bb = rem_bbs[i];
350       FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
351 	if (ae->dest != EXIT_BLOCK_PTR && !bitmap_bit_p (seen, ae->dest->index))
352 	  {
353 	    bitmap_set_bit (seen, ae->dest->index);
354 	    bord_bbs[n_bord_bbs++] = ae->dest;
355 
356 	    if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
357 	      irred_invalidated = true;
358 	  }
359     }
360 
361   /* Remove the path.  */
362   from = e->src;
363   remove_branch (e);
364   dom_bbs.create (0);
365 
366   /* Cancel loops contained in the path.  */
367   for (i = 0; i < nrem; i++)
368     if (rem_bbs[i]->loop_father->header == rem_bbs[i])
369       cancel_loop_tree (rem_bbs[i]->loop_father);
370 
371   remove_bbs (rem_bbs, nrem);
372   free (rem_bbs);
373 
374   /* Find blocks whose dominators may be affected.  */
375   bitmap_clear (seen);
376   for (i = 0; i < n_bord_bbs; i++)
377     {
378       basic_block ldom;
379 
380       bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
381       if (bitmap_bit_p (seen, bb->index))
382 	continue;
383       bitmap_set_bit (seen, bb->index);
384 
385       for (ldom = first_dom_son (CDI_DOMINATORS, bb);
386 	   ldom;
387 	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
388 	if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
389 	  dom_bbs.safe_push (ldom);
390     }
391 
392   free (seen);
393 
394   /* Recount dominators.  */
395   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
396   dom_bbs.release ();
397   free (bord_bbs);
398 
399   /* Fix placements of basic blocks inside loops and the placement of
400      loops in the loop tree.  */
401   fix_bb_placements (from, &irred_invalidated, NULL);
402   fix_loop_placements (from->loop_father, &irred_invalidated);
403 
404   if (irred_invalidated
405       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
406     mark_irreducible_loops ();
407 
408   return true;
409 }
410 
411 /* Creates place for a new LOOP in loops structure.  */
412 
413 static void
place_new_loop(struct loop * loop)414 place_new_loop (struct loop *loop)
415 {
416   loop->num = number_of_loops ();
417   vec_safe_push (current_loops->larray, loop);
418 }
419 
420 /* Given LOOP structure with filled header and latch, find the body of the
421    corresponding loop and add it to loops tree.  Insert the LOOP as a son of
422    outer.  */
423 
424 void
add_loop(struct loop * loop,struct loop * outer)425 add_loop (struct loop *loop, struct loop *outer)
426 {
427   basic_block *bbs;
428   int i, n;
429   struct loop *subloop;
430   edge e;
431   edge_iterator ei;
432 
433   /* Add it to loop structure.  */
434   place_new_loop (loop);
435   flow_loop_tree_node_add (outer, loop);
436 
437   /* Find its nodes.  */
438   bbs = XNEWVEC (basic_block, n_basic_blocks);
439   n = get_loop_body_with_size (loop, bbs, n_basic_blocks);
440 
441   for (i = 0; i < n; i++)
442     {
443       if (bbs[i]->loop_father == outer)
444 	{
445 	  remove_bb_from_loops (bbs[i]);
446 	  add_bb_to_loop (bbs[i], loop);
447 	  continue;
448 	}
449 
450       loop->num_nodes++;
451 
452       /* If we find a direct subloop of OUTER, move it to LOOP.  */
453       subloop = bbs[i]->loop_father;
454       if (loop_outer (subloop) == outer
455 	  && subloop->header == bbs[i])
456 	{
457 	  flow_loop_tree_node_remove (subloop);
458 	  flow_loop_tree_node_add (loop, subloop);
459 	}
460     }
461 
462   /* Update the information about loop exit edges.  */
463   for (i = 0; i < n; i++)
464     {
465       FOR_EACH_EDGE (e, ei, bbs[i]->succs)
466 	{
467 	  rescan_loop_exit (e, false, false);
468 	}
469     }
470 
471   free (bbs);
472 }
473 
474 /* Multiply all frequencies in LOOP by NUM/DEN.  */
475 
476 void
scale_loop_frequencies(struct loop * loop,int num,int den)477 scale_loop_frequencies (struct loop *loop, int num, int den)
478 {
479   basic_block *bbs;
480 
481   bbs = get_loop_body (loop);
482   scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
483   free (bbs);
484 }
485 
486 /* Multiply all frequencies in LOOP by SCALE/REG_BR_PROB_BASE.
487    If ITERATION_BOUND is non-zero, scale even further if loop is predicted
488    to iterate too many times.  */
489 
490 void
scale_loop_profile(struct loop * loop,int scale,gcov_type iteration_bound)491 scale_loop_profile (struct loop *loop, int scale, gcov_type iteration_bound)
492 {
493   gcov_type iterations = expected_loop_iterations_unbounded (loop);
494   edge e;
495   edge_iterator ei;
496 
497   if (dump_file && (dump_flags & TDF_DETAILS))
498     fprintf (dump_file, ";; Scaling loop %i with scale %f, "
499 	     "bounding iterations to %i from guessed %i\n",
500 	     loop->num, (double)scale / REG_BR_PROB_BASE,
501 	     (int)iteration_bound, (int)iterations);
502 
503   /* See if loop is predicted to iterate too many times.  */
504   if (iteration_bound && iterations > 0
505       && RDIV (iterations * scale, REG_BR_PROB_BASE) > iteration_bound)
506     {
507       /* Fixing loop profile for different trip count is not trivial; the exit
508 	 probabilities has to be updated to match and frequencies propagated down
509 	 to the loop body.
510 
511 	 We fully update only the simple case of loop with single exit that is
512 	 either from the latch or BB just before latch and leads from BB with
513 	 simple conditional jump.   This is OK for use in vectorizer.  */
514       e = single_exit (loop);
515       if (e)
516 	{
517 	  edge other_e;
518 	  int freq_delta;
519 	  gcov_type count_delta;
520 
521           FOR_EACH_EDGE (other_e, ei, e->src->succs)
522 	    if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
523 		&& e != other_e)
524 	      break;
525 
526 	  /* Probability of exit must be 1/iterations.  */
527 	  freq_delta = EDGE_FREQUENCY (e);
528 	  e->probability = REG_BR_PROB_BASE / iteration_bound;
529 	  other_e->probability = inverse_probability (e->probability);
530 	  freq_delta -= EDGE_FREQUENCY (e);
531 
532 	  /* Adjust counts accordingly.  */
533 	  count_delta = e->count;
534 	  e->count = apply_probability (e->src->count, e->probability);
535 	  other_e->count = apply_probability (e->src->count, other_e->probability);
536 	  count_delta -= e->count;
537 
538 	  /* If latch exists, change its frequency and count, since we changed
539 	     probability of exit.  Theoretically we should update everything from
540 	     source of exit edge to latch, but for vectorizer this is enough.  */
541 	  if (loop->latch
542 	      && loop->latch != e->src)
543 	    {
544 	      loop->latch->frequency += freq_delta;
545 	      if (loop->latch->frequency < 0)
546 		loop->latch->frequency = 0;
547 	      loop->latch->count += count_delta;
548 	      if (loop->latch->count < 0)
549 		loop->latch->count = 0;
550 	    }
551 	}
552 
553       /* Roughly speaking we want to reduce the loop body profile by the
554 	 the difference of loop iterations.  We however can do better if
555 	 we look at the actual profile, if it is available.  */
556       scale = RDIV (iteration_bound * scale, iterations);
557       if (loop->header->count)
558 	{
559 	  gcov_type count_in = 0;
560 
561 	  FOR_EACH_EDGE (e, ei, loop->header->preds)
562 	    if (e->src != loop->latch)
563 	      count_in += e->count;
564 
565 	  if (count_in != 0)
566 	    scale = RDIV (count_in * iteration_bound * REG_BR_PROB_BASE, loop->header->count);
567 	}
568       else if (loop->header->frequency)
569 	{
570 	  int freq_in = 0;
571 
572 	  FOR_EACH_EDGE (e, ei, loop->header->preds)
573 	    if (e->src != loop->latch)
574 	      freq_in += EDGE_FREQUENCY (e);
575 
576 	  if (freq_in != 0)
577 	    scale = RDIV (freq_in * iteration_bound * REG_BR_PROB_BASE, loop->header->frequency);
578 	}
579       if (!scale)
580 	scale = 1;
581     }
582 
583   if (scale == REG_BR_PROB_BASE)
584     return;
585 
586   /* Scale the actual probabilities.  */
587   scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
588   if (dump_file && (dump_flags & TDF_DETAILS))
589     fprintf (dump_file, ";; guessed iterations are now %i\n",
590 	     (int)expected_loop_iterations_unbounded (loop));
591 }
592 
593 /* Recompute dominance information for basic blocks outside LOOP.  */
594 
595 static void
update_dominators_in_loop(struct loop * loop)596 update_dominators_in_loop (struct loop *loop)
597 {
598   vec<basic_block> dom_bbs = vNULL;
599   sbitmap seen;
600   basic_block *body;
601   unsigned i;
602 
603   seen = sbitmap_alloc (last_basic_block);
604   bitmap_clear (seen);
605   body = get_loop_body (loop);
606 
607   for (i = 0; i < loop->num_nodes; i++)
608     bitmap_set_bit (seen, body[i]->index);
609 
610   for (i = 0; i < loop->num_nodes; i++)
611     {
612       basic_block ldom;
613 
614       for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
615 	   ldom;
616 	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
617 	if (!bitmap_bit_p (seen, ldom->index))
618 	  {
619 	    bitmap_set_bit (seen, ldom->index);
620 	    dom_bbs.safe_push (ldom);
621 	  }
622     }
623 
624   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
625   free (body);
626   free (seen);
627   dom_bbs.release ();
628 }
629 
630 /* Creates an if region as shown above. CONDITION is used to create
631    the test for the if.
632 
633    |
634    |     -------------                 -------------
635    |     |  pred_bb  |                 |  pred_bb  |
636    |     -------------                 -------------
637    |           |                             |
638    |           |                             | ENTRY_EDGE
639    |           | ENTRY_EDGE                  V
640    |           |             ====>     -------------
641    |           |                       |  cond_bb  |
642    |           |                       | CONDITION |
643    |           |                       -------------
644    |           V                        /         \
645    |     -------------         e_false /           \ e_true
646    |     |  succ_bb  |                V             V
647    |     -------------         -----------       -----------
648    |                           | false_bb |      | true_bb |
649    |                           -----------       -----------
650    |                                   \           /
651    |                                    \         /
652    |                                     V       V
653    |                                   -------------
654    |                                   |  join_bb  |
655    |                                   -------------
656    |                                         | exit_edge (result)
657    |                                         V
658    |                                    -----------
659    |                                    | succ_bb |
660    |                                    -----------
661    |
662  */
663 
664 edge
create_empty_if_region_on_edge(edge entry_edge,tree condition)665 create_empty_if_region_on_edge (edge entry_edge, tree condition)
666 {
667 
668   basic_block cond_bb, true_bb, false_bb, join_bb;
669   edge e_true, e_false, exit_edge;
670   gimple cond_stmt;
671   tree simple_cond;
672   gimple_stmt_iterator gsi;
673 
674   cond_bb = split_edge (entry_edge);
675 
676   /* Insert condition in cond_bb.  */
677   gsi = gsi_last_bb (cond_bb);
678   simple_cond =
679     force_gimple_operand_gsi (&gsi, condition, true, NULL,
680 			      false, GSI_NEW_STMT);
681   cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
682   gsi = gsi_last_bb (cond_bb);
683   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
684 
685   join_bb = split_edge (single_succ_edge (cond_bb));
686 
687   e_true = single_succ_edge (cond_bb);
688   true_bb = split_edge (e_true);
689 
690   e_false = make_edge (cond_bb, join_bb, 0);
691   false_bb = split_edge (e_false);
692 
693   e_true->flags &= ~EDGE_FALLTHRU;
694   e_true->flags |= EDGE_TRUE_VALUE;
695   e_false->flags &= ~EDGE_FALLTHRU;
696   e_false->flags |= EDGE_FALSE_VALUE;
697 
698   set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
699   set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
700   set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
701   set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
702 
703   exit_edge = single_succ_edge (join_bb);
704 
705   if (single_pred_p (exit_edge->dest))
706     set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
707 
708   return exit_edge;
709 }
710 
711 /* create_empty_loop_on_edge
712    |
713    |    - pred_bb -                   ------ pred_bb ------
714    |   |           |                 | iv0 = initial_value |
715    |    -----|-----                   ---------|-----------
716    |         |                       ______    | entry_edge
717    |         | entry_edge           /      |   |
718    |         |             ====>   |      -V---V- loop_header -------------
719    |         V                     |     | iv_before = phi (iv0, iv_after) |
720    |    - succ_bb -                |      ---|-----------------------------
721    |   |           |               |         |
722    |    -----------                |      ---V--- loop_body ---------------
723    |                               |     | iv_after = iv_before + stride   |
724    |                               |     | if (iv_before < upper_bound)    |
725    |                               |      ---|--------------\--------------
726    |                               |         |               \ exit_e
727    |                               |         V                \
728    |                               |       - loop_latch -      V- succ_bb -
729    |                               |      |              |     |           |
730    |                               |       /-------------       -----------
731    |                                \ ___ /
732 
733    Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
734    that is used before the increment of IV. IV_BEFORE should be used for
735    adding code to the body that uses the IV.  OUTER is the outer loop in
736    which the new loop should be inserted.
737 
738    Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
739    inserted on the loop entry edge.  This implies that this function
740    should be used only when the UPPER_BOUND expression is a loop
741    invariant.  */
742 
743 struct loop *
create_empty_loop_on_edge(edge entry_edge,tree initial_value,tree stride,tree upper_bound,tree iv,tree * iv_before,tree * iv_after,struct loop * outer)744 create_empty_loop_on_edge (edge entry_edge,
745 			   tree initial_value,
746 			   tree stride, tree upper_bound,
747 			   tree iv,
748 			   tree *iv_before,
749 			   tree *iv_after,
750 			   struct loop *outer)
751 {
752   basic_block loop_header, loop_latch, succ_bb, pred_bb;
753   struct loop *loop;
754   gimple_stmt_iterator gsi;
755   gimple_seq stmts;
756   gimple cond_expr;
757   tree exit_test;
758   edge exit_e;
759   int prob;
760 
761   gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
762 
763   /* Create header, latch and wire up the loop.  */
764   pred_bb = entry_edge->src;
765   loop_header = split_edge (entry_edge);
766   loop_latch = split_edge (single_succ_edge (loop_header));
767   succ_bb = single_succ (loop_latch);
768   make_edge (loop_header, succ_bb, 0);
769   redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
770 
771   /* Set immediate dominator information.  */
772   set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
773   set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
774   set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
775 
776   /* Initialize a loop structure and put it in a loop hierarchy.  */
777   loop = alloc_loop ();
778   loop->header = loop_header;
779   loop->latch = loop_latch;
780   add_loop (loop, outer);
781 
782   /* TODO: Fix frequencies and counts.  */
783   prob = REG_BR_PROB_BASE / 2;
784 
785   scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE);
786 
787   /* Update dominators.  */
788   update_dominators_in_loop (loop);
789 
790   /* Modify edge flags.  */
791   exit_e = single_exit (loop);
792   exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
793   single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
794 
795   /* Construct IV code in loop.  */
796   initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
797   if (stmts)
798     {
799       gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
800       gsi_commit_edge_inserts ();
801     }
802 
803   upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
804   if (stmts)
805     {
806       gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
807       gsi_commit_edge_inserts ();
808     }
809 
810   gsi = gsi_last_bb (loop_header);
811   create_iv (initial_value, stride, iv, loop, &gsi, false,
812 	     iv_before, iv_after);
813 
814   /* Insert loop exit condition.  */
815   cond_expr = gimple_build_cond
816     (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
817 
818   exit_test = gimple_cond_lhs (cond_expr);
819   exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
820 					false, GSI_NEW_STMT);
821   gimple_cond_set_lhs (cond_expr, exit_test);
822   gsi = gsi_last_bb (exit_e->src);
823   gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
824 
825   split_block_after_labels (loop_header);
826 
827   return loop;
828 }
829 
830 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
831    latch to header and update loop tree and dominators
832    accordingly. Everything between them plus LATCH_EDGE destination must
833    be dominated by HEADER_EDGE destination, and back-reachable from
834    LATCH_EDGE source.  HEADER_EDGE is redirected to basic block SWITCH_BB,
835    FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
836    TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
837    Returns the newly created loop.  Frequencies and counts in the new loop
838    are scaled by FALSE_SCALE and in the old one by TRUE_SCALE.  */
839 
840 struct loop *
loopify(edge latch_edge,edge header_edge,basic_block switch_bb,edge true_edge,edge false_edge,bool redirect_all_edges,unsigned true_scale,unsigned false_scale)841 loopify (edge latch_edge, edge header_edge,
842 	 basic_block switch_bb, edge true_edge, edge false_edge,
843 	 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
844 {
845   basic_block succ_bb = latch_edge->dest;
846   basic_block pred_bb = header_edge->src;
847   struct loop *loop = alloc_loop ();
848   struct loop *outer = loop_outer (succ_bb->loop_father);
849   int freq;
850   gcov_type cnt;
851   edge e;
852   edge_iterator ei;
853 
854   loop->header = header_edge->dest;
855   loop->latch = latch_edge->src;
856 
857   freq = EDGE_FREQUENCY (header_edge);
858   cnt = header_edge->count;
859 
860   /* Redirect edges.  */
861   loop_redirect_edge (latch_edge, loop->header);
862   loop_redirect_edge (true_edge, succ_bb);
863 
864   /* During loop versioning, one of the switch_bb edge is already properly
865      set. Do not redirect it again unless redirect_all_edges is true.  */
866   if (redirect_all_edges)
867     {
868       loop_redirect_edge (header_edge, switch_bb);
869       loop_redirect_edge (false_edge, loop->header);
870 
871       /* Update dominators.  */
872       set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
873       set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
874     }
875 
876   set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
877 
878   /* Compute new loop.  */
879   add_loop (loop, outer);
880 
881   /* Add switch_bb to appropriate loop.  */
882   if (switch_bb->loop_father)
883     remove_bb_from_loops (switch_bb);
884   add_bb_to_loop (switch_bb, outer);
885 
886   /* Fix frequencies.  */
887   if (redirect_all_edges)
888     {
889       switch_bb->frequency = freq;
890       switch_bb->count = cnt;
891       FOR_EACH_EDGE (e, ei, switch_bb->succs)
892 	{
893 	  e->count = RDIV (switch_bb->count * e->probability, REG_BR_PROB_BASE);
894 	}
895     }
896   scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
897   scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
898   update_dominators_in_loop (loop);
899 
900   return loop;
901 }
902 
903 /* Remove the latch edge of a LOOP and update loops to indicate that
904    the LOOP was removed.  After this function, original loop latch will
905    have no successor, which caller is expected to fix somehow.
906 
907    If this may cause the information about irreducible regions to become
908    invalid, IRRED_INVALIDATED is set to true.
909 
910    LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
911    basic blocks that had non-trivial update on their loop_father.*/
912 
913 void
unloop(struct loop * loop,bool * irred_invalidated,bitmap loop_closed_ssa_invalidated)914 unloop (struct loop *loop, bool *irred_invalidated,
915 	bitmap loop_closed_ssa_invalidated)
916 {
917   basic_block *body;
918   struct loop *ploop;
919   unsigned i, n;
920   basic_block latch = loop->latch;
921   bool dummy = false;
922 
923   if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
924     *irred_invalidated = true;
925 
926   /* This is relatively straightforward.  The dominators are unchanged, as
927      loop header dominates loop latch, so the only thing we have to care of
928      is the placement of loops and basic blocks inside the loop tree.  We
929      move them all to the loop->outer, and then let fix_bb_placements do
930      its work.  */
931 
932   body = get_loop_body (loop);
933   n = loop->num_nodes;
934   for (i = 0; i < n; i++)
935     if (body[i]->loop_father == loop)
936       {
937 	remove_bb_from_loops (body[i]);
938 	add_bb_to_loop (body[i], loop_outer (loop));
939       }
940   free(body);
941 
942   while (loop->inner)
943     {
944       ploop = loop->inner;
945       flow_loop_tree_node_remove (ploop);
946       flow_loop_tree_node_add (loop_outer (loop), ploop);
947     }
948 
949   /* Remove the loop and free its data.  */
950   delete_loop (loop);
951 
952   remove_edge (single_succ_edge (latch));
953 
954   /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
955      there is an irreducible region inside the cancelled loop, the flags will
956      be still correct.  */
957   fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
958 }
959 
960 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
961    condition stated in description of fix_loop_placement holds for them.
962    It is used in case when we removed some edges coming out of LOOP, which
963    may cause the right placement of LOOP inside loop tree to change.
964 
965    IRRED_INVALIDATED is set to true if a change in the loop structures might
966    invalidate the information about irreducible regions.  */
967 
968 static void
fix_loop_placements(struct loop * loop,bool * irred_invalidated)969 fix_loop_placements (struct loop *loop, bool *irred_invalidated)
970 {
971   struct loop *outer;
972 
973   while (loop_outer (loop))
974     {
975       outer = loop_outer (loop);
976       if (!fix_loop_placement (loop, irred_invalidated))
977 	break;
978 
979       /* Changing the placement of a loop in the loop tree may alter the
980 	 validity of condition 2) of the description of fix_bb_placement
981 	 for its preheader, because the successor is the header and belongs
982 	 to the loop.  So call fix_bb_placements to fix up the placement
983 	 of the preheader and (possibly) of its predecessors.  */
984       fix_bb_placements (loop_preheader_edge (loop)->src,
985 			 irred_invalidated, NULL);
986       loop = outer;
987     }
988 }
989 
990 /* Duplicate loop bounds and other information we store about
991    the loop into its duplicate.  */
992 
993 void
copy_loop_info(struct loop * loop,struct loop * target)994 copy_loop_info (struct loop *loop, struct loop *target)
995 {
996   gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
997   target->any_upper_bound = loop->any_upper_bound;
998   target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
999   target->any_estimate = loop->any_estimate;
1000   target->nb_iterations_estimate = loop->nb_iterations_estimate;
1001   target->estimate_state = loop->estimate_state;
1002 }
1003 
1004 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1005    created loop into loops structure.  */
1006 struct loop *
duplicate_loop(struct loop * loop,struct loop * target)1007 duplicate_loop (struct loop *loop, struct loop *target)
1008 {
1009   struct loop *cloop;
1010   cloop = alloc_loop ();
1011   place_new_loop (cloop);
1012 
1013   copy_loop_info (loop, cloop);
1014 
1015   /* Mark the new loop as copy of LOOP.  */
1016   set_loop_copy (loop, cloop);
1017 
1018   /* Add it to target.  */
1019   flow_loop_tree_node_add (target, cloop);
1020 
1021   return cloop;
1022 }
1023 
1024 /* Copies structure of subloops of LOOP into TARGET loop, placing
1025    newly created loops into loop tree.  */
1026 void
duplicate_subloops(struct loop * loop,struct loop * target)1027 duplicate_subloops (struct loop *loop, struct loop *target)
1028 {
1029   struct loop *aloop, *cloop;
1030 
1031   for (aloop = loop->inner; aloop; aloop = aloop->next)
1032     {
1033       cloop = duplicate_loop (aloop, target);
1034       duplicate_subloops (aloop, cloop);
1035     }
1036 }
1037 
1038 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1039    into TARGET loop, placing newly created loops into loop tree.  */
1040 static void
copy_loops_to(struct loop ** copied_loops,int n,struct loop * target)1041 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
1042 {
1043   struct loop *aloop;
1044   int i;
1045 
1046   for (i = 0; i < n; i++)
1047     {
1048       aloop = duplicate_loop (copied_loops[i], target);
1049       duplicate_subloops (copied_loops[i], aloop);
1050     }
1051 }
1052 
1053 /* Redirects edge E to basic block DEST.  */
1054 static void
loop_redirect_edge(edge e,basic_block dest)1055 loop_redirect_edge (edge e, basic_block dest)
1056 {
1057   if (e->dest == dest)
1058     return;
1059 
1060   redirect_edge_and_branch_force (e, dest);
1061 }
1062 
1063 /* Check whether LOOP's body can be duplicated.  */
1064 bool
can_duplicate_loop_p(const struct loop * loop)1065 can_duplicate_loop_p (const struct loop *loop)
1066 {
1067   int ret;
1068   basic_block *bbs = get_loop_body (loop);
1069 
1070   ret = can_copy_bbs_p (bbs, loop->num_nodes);
1071   free (bbs);
1072 
1073   return ret;
1074 }
1075 
1076 /* Sets probability and count of edge E to zero.  The probability and count
1077    is redistributed evenly to the remaining edges coming from E->src.  */
1078 
1079 static void
set_zero_probability(edge e)1080 set_zero_probability (edge e)
1081 {
1082   basic_block bb = e->src;
1083   edge_iterator ei;
1084   edge ae, last = NULL;
1085   unsigned n = EDGE_COUNT (bb->succs);
1086   gcov_type cnt = e->count, cnt1;
1087   unsigned prob = e->probability, prob1;
1088 
1089   gcc_assert (n > 1);
1090   cnt1 = cnt / (n - 1);
1091   prob1 = prob / (n - 1);
1092 
1093   FOR_EACH_EDGE (ae, ei, bb->succs)
1094     {
1095       if (ae == e)
1096 	continue;
1097 
1098       ae->probability += prob1;
1099       ae->count += cnt1;
1100       last = ae;
1101     }
1102 
1103   /* Move the rest to one of the edges.  */
1104   last->probability += prob % (n - 1);
1105   last->count += cnt % (n - 1);
1106 
1107   e->probability = 0;
1108   e->count = 0;
1109 }
1110 
1111 /* Duplicates body of LOOP to given edge E NDUPL times.  Takes care of updating
1112    loop structure and dominators.  E's destination must be LOOP header for
1113    this to work, i.e. it must be entry or latch edge of this loop; these are
1114    unique, as the loops must have preheaders for this function to work
1115    correctly (in case E is latch, the function unrolls the loop, if E is entry
1116    edge, it peels the loop).  Store edges created by copying ORIG edge from
1117    copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
1118    original LOOP body, the other copies are numbered in order given by control
1119    flow through them) into TO_REMOVE array.  Returns false if duplication is
1120    impossible.  */
1121 
1122 bool
duplicate_loop_to_header_edge(struct loop * loop,edge e,unsigned int ndupl,sbitmap wont_exit,edge orig,vec<edge> * to_remove,int flags)1123 duplicate_loop_to_header_edge (struct loop *loop, edge e,
1124 			       unsigned int ndupl, sbitmap wont_exit,
1125 			       edge orig, vec<edge> *to_remove,
1126 			       int flags)
1127 {
1128   struct loop *target, *aloop;
1129   struct loop **orig_loops;
1130   unsigned n_orig_loops;
1131   basic_block header = loop->header, latch = loop->latch;
1132   basic_block *new_bbs, *bbs, *first_active;
1133   basic_block new_bb, bb, first_active_latch = NULL;
1134   edge ae, latch_edge;
1135   edge spec_edges[2], new_spec_edges[2];
1136 #define SE_LATCH 0
1137 #define SE_ORIG 1
1138   unsigned i, j, n;
1139   int is_latch = (latch == e->src);
1140   int scale_act = 0, *scale_step = NULL, scale_main = 0;
1141   int scale_after_exit = 0;
1142   int p, freq_in, freq_le, freq_out_orig;
1143   int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
1144   int add_irreducible_flag;
1145   basic_block place_after;
1146   bitmap bbs_to_scale = NULL;
1147   bitmap_iterator bi;
1148 
1149   gcc_assert (e->dest == loop->header);
1150   gcc_assert (ndupl > 0);
1151 
1152   if (orig)
1153     {
1154       /* Orig must be edge out of the loop.  */
1155       gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1156       gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1157     }
1158 
1159   n = loop->num_nodes;
1160   bbs = get_loop_body_in_dom_order (loop);
1161   gcc_assert (bbs[0] == loop->header);
1162   gcc_assert (bbs[n  - 1] == loop->latch);
1163 
1164   /* Check whether duplication is possible.  */
1165   if (!can_copy_bbs_p (bbs, loop->num_nodes))
1166     {
1167       free (bbs);
1168       return false;
1169     }
1170   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1171 
1172   /* In case we are doing loop peeling and the loop is in the middle of
1173      irreducible region, the peeled copies will be inside it too.  */
1174   add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1175   gcc_assert (!is_latch || !add_irreducible_flag);
1176 
1177   /* Find edge from latch.  */
1178   latch_edge = loop_latch_edge (loop);
1179 
1180   if (flags & DLTHE_FLAG_UPDATE_FREQ)
1181     {
1182       /* Calculate coefficients by that we have to scale frequencies
1183 	 of duplicated loop bodies.  */
1184       freq_in = header->frequency;
1185       freq_le = EDGE_FREQUENCY (latch_edge);
1186       if (freq_in == 0)
1187 	freq_in = 1;
1188       if (freq_in < freq_le)
1189 	freq_in = freq_le;
1190       freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
1191       if (freq_out_orig > freq_in - freq_le)
1192 	freq_out_orig = freq_in - freq_le;
1193       prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
1194       prob_pass_wont_exit =
1195 	      RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
1196 
1197       if (orig
1198 	  && REG_BR_PROB_BASE - orig->probability != 0)
1199 	{
1200 	  /* The blocks that are dominated by a removed exit edge ORIG have
1201 	     frequencies scaled by this.  */
1202 	  scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE,
1203 				   REG_BR_PROB_BASE - orig->probability);
1204 	  bbs_to_scale = BITMAP_ALLOC (NULL);
1205 	  for (i = 0; i < n; i++)
1206 	    {
1207 	      if (bbs[i] != orig->src
1208 		  && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1209 		bitmap_set_bit (bbs_to_scale, i);
1210 	    }
1211 	}
1212 
1213       scale_step = XNEWVEC (int, ndupl);
1214 
1215       for (i = 1; i <= ndupl; i++)
1216 	scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1217 				? prob_pass_wont_exit
1218 				: prob_pass_thru;
1219 
1220       /* Complete peeling is special as the probability of exit in last
1221 	 copy becomes 1.  */
1222       if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1223 	{
1224 	  int wanted_freq = EDGE_FREQUENCY (e);
1225 
1226 	  if (wanted_freq > freq_in)
1227 	    wanted_freq = freq_in;
1228 
1229 	  gcc_assert (!is_latch);
1230 	  /* First copy has frequency of incoming edge.  Each subsequent
1231 	     frequency should be reduced by prob_pass_wont_exit.  Caller
1232 	     should've managed the flags so all except for original loop
1233 	     has won't exist set.  */
1234 	  scale_act = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
1235 	  /* Now simulate the duplication adjustments and compute header
1236 	     frequency of the last copy.  */
1237 	  for (i = 0; i < ndupl; i++)
1238 	    wanted_freq = RDIV (wanted_freq * scale_step[i], REG_BR_PROB_BASE);
1239 	  scale_main = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
1240 	}
1241       else if (is_latch)
1242 	{
1243 	  prob_pass_main = bitmap_bit_p (wont_exit, 0)
1244 				? prob_pass_wont_exit
1245 				: prob_pass_thru;
1246 	  p = prob_pass_main;
1247 	  scale_main = REG_BR_PROB_BASE;
1248 	  for (i = 0; i < ndupl; i++)
1249 	    {
1250 	      scale_main += p;
1251 	      p = RDIV (p * scale_step[i], REG_BR_PROB_BASE);
1252 	    }
1253 	  scale_main = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, scale_main);
1254 	  scale_act = RDIV (scale_main * prob_pass_main, REG_BR_PROB_BASE);
1255 	}
1256       else
1257 	{
1258 	  scale_main = REG_BR_PROB_BASE;
1259 	  for (i = 0; i < ndupl; i++)
1260 	    scale_main = RDIV (scale_main * scale_step[i], REG_BR_PROB_BASE);
1261 	  scale_act = REG_BR_PROB_BASE - prob_pass_thru;
1262 	}
1263       for (i = 0; i < ndupl; i++)
1264 	gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
1265       gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
1266 		  && scale_act >= 0  && scale_act <= REG_BR_PROB_BASE);
1267     }
1268 
1269   /* Loop the new bbs will belong to.  */
1270   target = e->src->loop_father;
1271 
1272   /* Original loops.  */
1273   n_orig_loops = 0;
1274   for (aloop = loop->inner; aloop; aloop = aloop->next)
1275     n_orig_loops++;
1276   orig_loops = XNEWVEC (struct loop *, n_orig_loops);
1277   for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1278     orig_loops[i] = aloop;
1279 
1280   set_loop_copy (loop, target);
1281 
1282   first_active = XNEWVEC (basic_block, n);
1283   if (is_latch)
1284     {
1285       memcpy (first_active, bbs, n * sizeof (basic_block));
1286       first_active_latch = latch;
1287     }
1288 
1289   spec_edges[SE_ORIG] = orig;
1290   spec_edges[SE_LATCH] = latch_edge;
1291 
1292   place_after = e->src;
1293   for (j = 0; j < ndupl; j++)
1294     {
1295       /* Copy loops.  */
1296       copy_loops_to (orig_loops, n_orig_loops, target);
1297 
1298       /* Copy bbs.  */
1299       copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1300 		place_after);
1301       place_after = new_spec_edges[SE_LATCH]->src;
1302 
1303       if (flags & DLTHE_RECORD_COPY_NUMBER)
1304 	for (i = 0; i < n; i++)
1305 	  {
1306 	    gcc_assert (!new_bbs[i]->aux);
1307 	    new_bbs[i]->aux = (void *)(size_t)(j + 1);
1308 	  }
1309 
1310       /* Note whether the blocks and edges belong to an irreducible loop.  */
1311       if (add_irreducible_flag)
1312 	{
1313 	  for (i = 0; i < n; i++)
1314 	    new_bbs[i]->flags |= BB_DUPLICATED;
1315 	  for (i = 0; i < n; i++)
1316 	    {
1317 	      edge_iterator ei;
1318 	      new_bb = new_bbs[i];
1319 	      if (new_bb->loop_father == target)
1320 		new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1321 
1322 	      FOR_EACH_EDGE (ae, ei, new_bb->succs)
1323 		if ((ae->dest->flags & BB_DUPLICATED)
1324 		    && (ae->src->loop_father == target
1325 			|| ae->dest->loop_father == target))
1326 		  ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1327 	    }
1328 	  for (i = 0; i < n; i++)
1329 	    new_bbs[i]->flags &= ~BB_DUPLICATED;
1330 	}
1331 
1332       /* Redirect the special edges.  */
1333       if (is_latch)
1334 	{
1335 	  redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1336 	  redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1337 					  loop->header);
1338 	  set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1339 	  latch = loop->latch = new_bbs[n - 1];
1340 	  e = latch_edge = new_spec_edges[SE_LATCH];
1341 	}
1342       else
1343 	{
1344 	  redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1345 					  loop->header);
1346 	  redirect_edge_and_branch_force (e, new_bbs[0]);
1347 	  set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1348 	  e = new_spec_edges[SE_LATCH];
1349 	}
1350 
1351       /* Record exit edge in this copy.  */
1352       if (orig && bitmap_bit_p (wont_exit, j + 1))
1353 	{
1354 	  if (to_remove)
1355 	    to_remove->safe_push (new_spec_edges[SE_ORIG]);
1356 	  set_zero_probability (new_spec_edges[SE_ORIG]);
1357 
1358 	  /* Scale the frequencies of the blocks dominated by the exit.  */
1359 	  if (bbs_to_scale)
1360 	    {
1361 	      EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1362 		{
1363 		  scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
1364 					     REG_BR_PROB_BASE);
1365 		}
1366 	    }
1367 	}
1368 
1369       /* Record the first copy in the control flow order if it is not
1370 	 the original loop (i.e. in case of peeling).  */
1371       if (!first_active_latch)
1372 	{
1373 	  memcpy (first_active, new_bbs, n * sizeof (basic_block));
1374 	  first_active_latch = new_bbs[n - 1];
1375 	}
1376 
1377       /* Set counts and frequencies.  */
1378       if (flags & DLTHE_FLAG_UPDATE_FREQ)
1379 	{
1380 	  scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
1381 	  scale_act = RDIV (scale_act * scale_step[j], REG_BR_PROB_BASE);
1382 	}
1383     }
1384   free (new_bbs);
1385   free (orig_loops);
1386 
1387   /* Record the exit edge in the original loop body, and update the frequencies.  */
1388   if (orig && bitmap_bit_p (wont_exit, 0))
1389     {
1390       if (to_remove)
1391 	to_remove->safe_push (orig);
1392       set_zero_probability (orig);
1393 
1394       /* Scale the frequencies of the blocks dominated by the exit.  */
1395       if (bbs_to_scale)
1396 	{
1397 	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1398 	    {
1399 	      scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
1400 					 REG_BR_PROB_BASE);
1401 	    }
1402 	}
1403     }
1404 
1405   /* Update the original loop.  */
1406   if (!is_latch)
1407     set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1408   if (flags & DLTHE_FLAG_UPDATE_FREQ)
1409     {
1410       scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
1411       free (scale_step);
1412     }
1413 
1414   /* Update dominators of outer blocks if affected.  */
1415   for (i = 0; i < n; i++)
1416     {
1417       basic_block dominated, dom_bb;
1418       vec<basic_block> dom_bbs;
1419       unsigned j;
1420 
1421       bb = bbs[i];
1422       bb->aux = 0;
1423 
1424       dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1425       FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1426 	{
1427 	  if (flow_bb_inside_loop_p (loop, dominated))
1428 	    continue;
1429 	  dom_bb = nearest_common_dominator (
1430 			CDI_DOMINATORS, first_active[i], first_active_latch);
1431 	  set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1432 	}
1433       dom_bbs.release ();
1434     }
1435   free (first_active);
1436 
1437   free (bbs);
1438   BITMAP_FREE (bbs_to_scale);
1439 
1440   return true;
1441 }
1442 
1443 /* A callback for make_forwarder block, to redirect all edges except for
1444    MFB_KJ_EDGE to the entry part.  E is the edge for that we should decide
1445    whether to redirect it.  */
1446 
1447 edge mfb_kj_edge;
1448 bool
mfb_keep_just(edge e)1449 mfb_keep_just (edge e)
1450 {
1451   return e != mfb_kj_edge;
1452 }
1453 
1454 /* True when a candidate preheader BLOCK has predecessors from LOOP.  */
1455 
1456 static bool
has_preds_from_loop(basic_block block,struct loop * loop)1457 has_preds_from_loop (basic_block block, struct loop *loop)
1458 {
1459   edge e;
1460   edge_iterator ei;
1461 
1462   FOR_EACH_EDGE (e, ei, block->preds)
1463     if (e->src->loop_father == loop)
1464       return true;
1465   return false;
1466 }
1467 
1468 /* Creates a pre-header for a LOOP.  Returns newly created block.  Unless
1469    CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1470    entry; otherwise we also force preheader block to have only one successor.
1471    When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1472    to be a fallthru predecessor to the loop header and to have only
1473    predecessors from outside of the loop.
1474    The function also updates dominators.  */
1475 
1476 basic_block
create_preheader(struct loop * loop,int flags)1477 create_preheader (struct loop *loop, int flags)
1478 {
1479   edge e, fallthru;
1480   basic_block dummy;
1481   int nentry = 0;
1482   bool irred = false;
1483   bool latch_edge_was_fallthru;
1484   edge one_succ_pred = NULL, single_entry = NULL;
1485   edge_iterator ei;
1486 
1487   FOR_EACH_EDGE (e, ei, loop->header->preds)
1488     {
1489       if (e->src == loop->latch)
1490 	continue;
1491       irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1492       nentry++;
1493       single_entry = e;
1494       if (single_succ_p (e->src))
1495 	one_succ_pred = e;
1496     }
1497   gcc_assert (nentry);
1498   if (nentry == 1)
1499     {
1500       bool need_forwarder_block = false;
1501 
1502       /* We do not allow entry block to be the loop preheader, since we
1503 	     cannot emit code there.  */
1504       if (single_entry->src == ENTRY_BLOCK_PTR)
1505         need_forwarder_block = true;
1506       else
1507         {
1508           /* If we want simple preheaders, also force the preheader to have
1509              just a single successor.  */
1510           if ((flags & CP_SIMPLE_PREHEADERS)
1511               && !single_succ_p (single_entry->src))
1512             need_forwarder_block = true;
1513           /* If we want fallthru preheaders, also create forwarder block when
1514              preheader ends with a jump or has predecessors from loop.  */
1515           else if ((flags & CP_FALLTHRU_PREHEADERS)
1516                    && (JUMP_P (BB_END (single_entry->src))
1517                        || has_preds_from_loop (single_entry->src, loop)))
1518             need_forwarder_block = true;
1519         }
1520       if (! need_forwarder_block)
1521 	return NULL;
1522     }
1523 
1524   mfb_kj_edge = loop_latch_edge (loop);
1525   latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1526   fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1527   dummy = fallthru->src;
1528   loop->header = fallthru->dest;
1529 
1530   /* Try to be clever in placing the newly created preheader.  The idea is to
1531      avoid breaking any "fallthruness" relationship between blocks.
1532 
1533      The preheader was created just before the header and all incoming edges
1534      to the header were redirected to the preheader, except the latch edge.
1535      So the only problematic case is when this latch edge was a fallthru
1536      edge: it is not anymore after the preheader creation so we have broken
1537      the fallthruness.  We're therefore going to look for a better place.  */
1538   if (latch_edge_was_fallthru)
1539     {
1540       if (one_succ_pred)
1541 	e = one_succ_pred;
1542       else
1543 	e = EDGE_PRED (dummy, 0);
1544 
1545       move_block_after (dummy, e->src);
1546     }
1547 
1548   if (irred)
1549     {
1550       dummy->flags |= BB_IRREDUCIBLE_LOOP;
1551       single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1552     }
1553 
1554   if (dump_file)
1555     fprintf (dump_file, "Created preheader block for loop %i\n",
1556 	     loop->num);
1557 
1558   if (flags & CP_FALLTHRU_PREHEADERS)
1559     gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1560                 && !JUMP_P (BB_END (dummy)));
1561 
1562   return dummy;
1563 }
1564 
1565 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader.  */
1566 
1567 void
create_preheaders(int flags)1568 create_preheaders (int flags)
1569 {
1570   loop_iterator li;
1571   struct loop *loop;
1572 
1573   if (!current_loops)
1574     return;
1575 
1576   FOR_EACH_LOOP (li, loop, 0)
1577     create_preheader (loop, flags);
1578   loops_state_set (LOOPS_HAVE_PREHEADERS);
1579 }
1580 
1581 /* Forces all loop latches to have only single successor.  */
1582 
1583 void
force_single_succ_latches(void)1584 force_single_succ_latches (void)
1585 {
1586   loop_iterator li;
1587   struct loop *loop;
1588   edge e;
1589 
1590   FOR_EACH_LOOP (li, loop, 0)
1591     {
1592       if (loop->latch != loop->header && single_succ_p (loop->latch))
1593 	continue;
1594 
1595       e = find_edge (loop->latch, loop->header);
1596       gcc_checking_assert (e != NULL);
1597 
1598       split_edge (e);
1599     }
1600   loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1601 }
1602 
1603 /* This function is called from loop_version.  It splits the entry edge
1604    of the loop we want to version, adds the versioning condition, and
1605    adjust the edges to the two versions of the loop appropriately.
1606    e is an incoming edge. Returns the basic block containing the
1607    condition.
1608 
1609    --- edge e ---- > [second_head]
1610 
1611    Split it and insert new conditional expression and adjust edges.
1612 
1613     --- edge e ---> [cond expr] ---> [first_head]
1614 			|
1615 			+---------> [second_head]
1616 
1617   THEN_PROB is the probability of then branch of the condition.  */
1618 
1619 static basic_block
lv_adjust_loop_entry_edge(basic_block first_head,basic_block second_head,edge e,void * cond_expr,unsigned then_prob)1620 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1621 			   edge e, void *cond_expr, unsigned then_prob)
1622 {
1623   basic_block new_head = NULL;
1624   edge e1;
1625 
1626   gcc_assert (e->dest == second_head);
1627 
1628   /* Split edge 'e'. This will create a new basic block, where we can
1629      insert conditional expr.  */
1630   new_head = split_edge (e);
1631 
1632   lv_add_condition_to_bb (first_head, second_head, new_head,
1633 			  cond_expr);
1634 
1635   /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there.  */
1636   e = single_succ_edge (new_head);
1637   e1 = make_edge (new_head, first_head,
1638 		  current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1639   e1->probability = then_prob;
1640   e->probability = REG_BR_PROB_BASE - then_prob;
1641   e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE);
1642   e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE);
1643 
1644   set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1645   set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1646 
1647   /* Adjust loop header phi nodes.  */
1648   lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1649 
1650   return new_head;
1651 }
1652 
1653 /* Main entry point for Loop Versioning transformation.
1654 
1655    This transformation given a condition and a loop, creates
1656    -if (condition) { loop_copy1 } else { loop_copy2 },
1657    where loop_copy1 is the loop transformed in one way, and loop_copy2
1658    is the loop transformed in another way (or unchanged). 'condition'
1659    may be a run time test for things that were not resolved by static
1660    analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1661 
1662    THEN_PROB is the probability of the then edge of the if.  THEN_SCALE
1663    is the ratio by that the frequencies in the original loop should
1664    be scaled.  ELSE_SCALE is the ratio by that the frequencies in the
1665    new loop should be scaled.
1666 
1667    If PLACE_AFTER is true, we place the new loop after LOOP in the
1668    instruction stream, otherwise it is placed before LOOP.  */
1669 
1670 struct loop *
loop_version(struct loop * loop,void * cond_expr,basic_block * condition_bb,unsigned then_prob,unsigned then_scale,unsigned else_scale,bool place_after)1671 loop_version (struct loop *loop,
1672 	      void *cond_expr, basic_block *condition_bb,
1673 	      unsigned then_prob, unsigned then_scale, unsigned else_scale,
1674 	      bool place_after)
1675 {
1676   basic_block first_head, second_head;
1677   edge entry, latch_edge, true_edge, false_edge;
1678   int irred_flag;
1679   struct loop *nloop;
1680   basic_block cond_bb;
1681 
1682   /* Record entry and latch edges for the loop */
1683   entry = loop_preheader_edge (loop);
1684   irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1685   entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1686 
1687   /* Note down head of loop as first_head.  */
1688   first_head = entry->dest;
1689 
1690   /* Duplicate loop.  */
1691   if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1692 					       NULL, NULL, NULL, 0))
1693     {
1694       entry->flags |= irred_flag;
1695       return NULL;
1696     }
1697 
1698   /* After duplication entry edge now points to new loop head block.
1699      Note down new head as second_head.  */
1700   second_head = entry->dest;
1701 
1702   /* Split loop entry edge and insert new block with cond expr.  */
1703   cond_bb =  lv_adjust_loop_entry_edge (first_head, second_head,
1704 					entry, cond_expr, then_prob);
1705   if (condition_bb)
1706     *condition_bb = cond_bb;
1707 
1708   if (!cond_bb)
1709     {
1710       entry->flags |= irred_flag;
1711       return NULL;
1712     }
1713 
1714   latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1715 
1716   extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1717   nloop = loopify (latch_edge,
1718 		   single_pred_edge (get_bb_copy (loop->header)),
1719 		   cond_bb, true_edge, false_edge,
1720 		   false /* Do not redirect all edges.  */,
1721 		   then_scale, else_scale);
1722 
1723   copy_loop_info (loop, nloop);
1724 
1725   /* loopify redirected latch_edge. Update its PENDING_STMTS.  */
1726   lv_flush_pending_stmts (latch_edge);
1727 
1728   /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS.  */
1729   extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1730   lv_flush_pending_stmts (false_edge);
1731   /* Adjust irreducible flag.  */
1732   if (irred_flag)
1733     {
1734       cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1735       loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1736       loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1737       single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1738     }
1739 
1740   if (place_after)
1741     {
1742       basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1743       unsigned i;
1744 
1745       after = loop->latch;
1746 
1747       for (i = 0; i < nloop->num_nodes; i++)
1748 	{
1749 	  move_block_after (bbs[i], after);
1750 	  after = bbs[i];
1751 	}
1752       free (bbs);
1753     }
1754 
1755   /* At this point condition_bb is loop preheader with two successors,
1756      first_head and second_head.   Make sure that loop preheader has only
1757      one successor.  */
1758   split_edge (loop_preheader_edge (loop));
1759   split_edge (loop_preheader_edge (nloop));
1760 
1761   return nloop;
1762 }
1763