1 /* Loop splitting.
2    Copyright (C) 2015-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "tree-pass.h"
27 #include "ssa.h"
28 #include "fold-const.h"
29 #include "tree-cfg.h"
30 #include "tree-ssa.h"
31 #include "tree-ssa-loop-niter.h"
32 #include "tree-ssa-loop.h"
33 #include "tree-ssa-loop-manip.h"
34 #include "tree-into-ssa.h"
35 #include "tree-inline.h"
36 #include "tree-cfgcleanup.h"
37 #include "cfgloop.h"
38 #include "tree-scalar-evolution.h"
39 #include "gimple-iterator.h"
40 #include "gimple-pretty-print.h"
41 #include "cfghooks.h"
42 #include "gimple-fold.h"
43 #include "gimplify-me.h"
44 
45 /* This file implements two kinds of loop splitting.
46 
47    One transformation of loops like:
48 
49    for (i = 0; i < 100; i++)
50      {
51        if (i < 50)
52          A;
53        else
54          B;
55      }
56 
57    into:
58 
59    for (i = 0; i < 50; i++)
60      {
61        A;
62      }
63    for (; i < 100; i++)
64      {
65        B;
66      }
67 
68    */
69 
70 /* Return true when BB inside LOOP is a potential iteration space
71    split point, i.e. ends with a condition like "IV < comp", which
72    is true on one side of the iteration space and false on the other,
73    and the split point can be computed.  If so, also return the border
74    point in *BORDER and the comparison induction variable in IV.  */
75 
76 static tree
split_at_bb_p(class loop * loop,basic_block bb,tree * border,affine_iv * iv)77 split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
78 {
79   gimple *last;
80   gcond *stmt;
81   affine_iv iv2;
82 
83   /* BB must end in a simple conditional jump.  */
84   last = last_stmt (bb);
85   if (!last || gimple_code (last) != GIMPLE_COND)
86     return NULL_TREE;
87   stmt = as_a <gcond *> (last);
88 
89   enum tree_code code = gimple_cond_code (stmt);
90 
91   /* Only handle relational comparisons, for equality and non-equality
92      we'd have to split the loop into two loops and a middle statement.  */
93   switch (code)
94     {
95       case LT_EXPR:
96       case LE_EXPR:
97       case GT_EXPR:
98       case GE_EXPR:
99 	break;
100       default:
101 	return NULL_TREE;
102     }
103 
104   if (loop_exits_from_bb_p (loop, bb))
105     return NULL_TREE;
106 
107   tree op0 = gimple_cond_lhs (stmt);
108   tree op1 = gimple_cond_rhs (stmt);
109   class loop *useloop = loop_containing_stmt (stmt);
110 
111   if (!simple_iv (loop, useloop, op0, iv, false))
112     return NULL_TREE;
113   if (!simple_iv (loop, useloop, op1, &iv2, false))
114     return NULL_TREE;
115 
116   /* Make it so that the first argument of the condition is
117      the looping one.  */
118   if (!integer_zerop (iv2.step))
119     {
120       std::swap (op0, op1);
121       std::swap (*iv, iv2);
122       code = swap_tree_comparison (code);
123       gimple_cond_set_condition (stmt, code, op0, op1);
124       update_stmt (stmt);
125     }
126   else if (integer_zerop (iv->step))
127     return NULL_TREE;
128   if (!integer_zerop (iv2.step))
129     return NULL_TREE;
130   if (!iv->no_overflow)
131     return NULL_TREE;
132 
133   if (dump_file && (dump_flags & TDF_DETAILS))
134     {
135       fprintf (dump_file, "Found potential split point: ");
136       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
137       fprintf (dump_file, " { ");
138       print_generic_expr (dump_file, iv->base, TDF_SLIM);
139       fprintf (dump_file, " + I*");
140       print_generic_expr (dump_file, iv->step, TDF_SLIM);
141       fprintf (dump_file, " } %s ", get_tree_code_name (code));
142       print_generic_expr (dump_file, iv2.base, TDF_SLIM);
143       fprintf (dump_file, "\n");
144     }
145 
146   *border = iv2.base;
147   return op0;
148 }
149 
150 /* Given a GUARD conditional stmt inside LOOP, which we want to make always
151    true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
152    (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
153    exit test statement to loop back only if the GUARD statement will
154    also be true/false in the next iteration.  */
155 
156 static void
patch_loop_exit(class loop * loop,gcond * guard,tree nextval,tree newbound,bool initial_true)157 patch_loop_exit (class loop *loop, gcond *guard, tree nextval, tree newbound,
158 		 bool initial_true)
159 {
160   edge exit = single_exit (loop);
161   gcond *stmt = as_a <gcond *> (last_stmt (exit->src));
162   gimple_cond_set_condition (stmt, gimple_cond_code (guard),
163 			     nextval, newbound);
164   update_stmt (stmt);
165 
166   edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
167 
168   exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
169   stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
170 
171   if (initial_true)
172     {
173       exit->flags |= EDGE_FALSE_VALUE;
174       stay->flags |= EDGE_TRUE_VALUE;
175     }
176   else
177     {
178       exit->flags |= EDGE_TRUE_VALUE;
179       stay->flags |= EDGE_FALSE_VALUE;
180     }
181 }
182 
183 /* Give an induction variable GUARD_IV, and its affine descriptor IV,
184    find the loop phi node in LOOP defining it directly, or create
185    such phi node.  Return that phi node.  */
186 
187 static gphi *
find_or_create_guard_phi(class loop * loop,tree guard_iv,affine_iv *)188 find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
189 {
190   gimple *def = SSA_NAME_DEF_STMT (guard_iv);
191   gphi *phi;
192   if ((phi = dyn_cast <gphi *> (def))
193       && gimple_bb (phi) == loop->header)
194     return phi;
195 
196   /* XXX Create the PHI instead.  */
197   return NULL;
198 }
199 
200 /* Returns true if the exit values of all loop phi nodes can be
201    determined easily (i.e. that connect_loop_phis can determine them).  */
202 
203 static bool
easy_exit_values(class loop * loop)204 easy_exit_values (class loop *loop)
205 {
206   edge exit = single_exit (loop);
207   edge latch = loop_latch_edge (loop);
208   gphi_iterator psi;
209 
210   /* Currently we regard the exit values as easy if they are the same
211      as the value over the backedge.  Which is the case if the definition
212      of the backedge value dominates the exit edge.  */
213   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
214     {
215       gphi *phi = psi.phi ();
216       tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
217       basic_block bb;
218       if (TREE_CODE (next) == SSA_NAME
219 	  && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
220 	  && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
221 	return false;
222     }
223 
224   return true;
225 }
226 
227 /* This function updates the SSA form after connect_loops made a new
228    edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
229    conditional).  I.e. the second loop can now be entered either
230    via the original entry or via NEW_E, so the entry values of LOOP2
231    phi nodes are either the original ones or those at the exit
232    of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
233    this.  The loops need to fulfill easy_exit_values().  */
234 
235 static void
connect_loop_phis(class loop * loop1,class loop * loop2,edge new_e)236 connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
237 {
238   basic_block rest = loop_preheader_edge (loop2)->src;
239   gcc_assert (new_e->dest == rest);
240   edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
241 
242   edge firste = loop_preheader_edge (loop1);
243   edge seconde = loop_preheader_edge (loop2);
244   edge firstn = loop_latch_edge (loop1);
245   gphi_iterator psi_first, psi_second;
246   for (psi_first = gsi_start_phis (loop1->header),
247        psi_second = gsi_start_phis (loop2->header);
248        !gsi_end_p (psi_first);
249        gsi_next (&psi_first), gsi_next (&psi_second))
250     {
251       tree init, next, new_init;
252       use_operand_p op;
253       gphi *phi_first = psi_first.phi ();
254       gphi *phi_second = psi_second.phi ();
255 
256       init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
257       next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
258       op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
259       gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
260 
261       /* Prefer using original variable as a base for the new ssa name.
262 	 This is necessary for virtual ops, and useful in order to avoid
263 	 losing debug info for real ops.  */
264       if (TREE_CODE (next) == SSA_NAME
265 	  && useless_type_conversion_p (TREE_TYPE (next),
266 					TREE_TYPE (init)))
267 	new_init = copy_ssa_name (next);
268       else if (TREE_CODE (init) == SSA_NAME
269 	       && useless_type_conversion_p (TREE_TYPE (init),
270 					     TREE_TYPE (next)))
271 	new_init = copy_ssa_name (init);
272       else if (useless_type_conversion_p (TREE_TYPE (next),
273 					  TREE_TYPE (init)))
274 	new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
275 				       "unrinittmp");
276       else
277 	new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
278 				       "unrinittmp");
279 
280       gphi * newphi = create_phi_node (new_init, rest);
281       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
282       add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
283       SET_USE (op, new_init);
284     }
285 }
286 
287 /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
288    they are still equivalent and placed in two arms of a diamond, like so:
289 
290                .------if (cond)------.
291                v                     v
292              pre1                   pre2
293               |                      |
294         .--->h1                     h2<----.
295         |     |                      |     |
296         |    ex1---.            .---ex2    |
297         |    /     |            |     \    |
298         '---l1     X            |     l2---'
299                    |            |
300                    |            |
301                    '--->join<---'
302 
303    This function transforms the program such that LOOP1 is conditionally
304    falling through to LOOP2, or skipping it.  This is done by splitting
305    the ex1->join edge at X in the diagram above, and inserting a condition
306    whose one arm goes to pre2, resulting in this situation:
307 
308                .------if (cond)------.
309                v                     v
310              pre1       .---------->pre2
311               |         |            |
312         .--->h1         |           h2<----.
313         |     |         |            |     |
314         |    ex1---.    |       .---ex2    |
315         |    /     v    |       |     \    |
316         '---l1   skip---'       |     l2---'
317                    |            |
318                    |            |
319                    '--->join<---'
320 
321 
322    The condition used is the exit condition of LOOP1, which effectively means
323    that when the first loop exits (for whatever reason) but the real original
324    exit expression is still false the second loop will be entered.
325    The function returns the new edge cond->pre2.
326 
327    This doesn't update the SSA form, see connect_loop_phis for that.  */
328 
329 static edge
connect_loops(class loop * loop1,class loop * loop2)330 connect_loops (class loop *loop1, class loop *loop2)
331 {
332   edge exit = single_exit (loop1);
333   basic_block skip_bb = split_edge (exit);
334   gcond *skip_stmt;
335   gimple_stmt_iterator gsi;
336   edge new_e, skip_e;
337 
338   gimple *stmt = last_stmt (exit->src);
339   skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
340 				 gimple_cond_lhs (stmt),
341 				 gimple_cond_rhs (stmt),
342 				 NULL_TREE, NULL_TREE);
343   gsi = gsi_last_bb (skip_bb);
344   gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
345 
346   skip_e = EDGE_SUCC (skip_bb, 0);
347   skip_e->flags &= ~EDGE_FALLTHRU;
348   new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
349   if (exit->flags & EDGE_TRUE_VALUE)
350     {
351       skip_e->flags |= EDGE_TRUE_VALUE;
352       new_e->flags |= EDGE_FALSE_VALUE;
353     }
354   else
355     {
356       skip_e->flags |= EDGE_FALSE_VALUE;
357       new_e->flags |= EDGE_TRUE_VALUE;
358     }
359 
360   new_e->probability = profile_probability::likely ();
361   skip_e->probability = new_e->probability.invert ();
362 
363   return new_e;
364 }
365 
366 /* This returns the new bound for iterations given the original iteration
367    space in NITER, an arbitrary new bound BORDER, assumed to be some
368    comparison value with a different IV, the initial value GUARD_INIT of
369    that other IV, and the comparison code GUARD_CODE that compares
370    that other IV with BORDER.  We return an SSA name, and place any
371    necessary statements for that computation into *STMTS.
372 
373    For example for such a loop:
374 
375      for (i = beg, j = guard_init; i < end; i++, j++)
376        if (j < border)  // this is supposed to be true/false
377          ...
378 
379    we want to return a new bound (on j) that makes the loop iterate
380    as long as the condition j < border stays true.  We also don't want
381    to iterate more often than the original loop, so we have to introduce
382    some cut-off as well (via min/max), effectively resulting in:
383 
384      newend = min (end+guard_init-beg, border)
385      for (i = beg; j = guard_init; j < newend; i++, j++)
386        if (j < c)
387          ...
388 
389    Depending on the direction of the IVs and if the exit tests
390    are strict or non-strict we need to use MIN or MAX,
391    and add or subtract 1.  This routine computes newend above.  */
392 
393 static tree
compute_new_first_bound(gimple_seq * stmts,class tree_niter_desc * niter,tree border,enum tree_code guard_code,tree guard_init)394 compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
395 			 tree border,
396 			 enum tree_code guard_code, tree guard_init)
397 {
398   /* The niter structure contains the after-increment IV, we need
399      the loop-enter base, so subtract STEP once.  */
400   tree controlbase = force_gimple_operand (niter->control.base,
401 					   stmts, true, NULL_TREE);
402   tree controlstep = niter->control.step;
403   tree enddiff;
404   if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
405     {
406       controlstep = gimple_build (stmts, NEGATE_EXPR,
407 				  TREE_TYPE (controlstep), controlstep);
408       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
409 			      TREE_TYPE (controlbase),
410 			      controlbase, controlstep);
411     }
412   else
413     enddiff = gimple_build (stmts, MINUS_EXPR,
414 			    TREE_TYPE (controlbase),
415 			    controlbase, controlstep);
416 
417   /* Compute end-beg.  */
418   gimple_seq stmts2;
419   tree end = force_gimple_operand (niter->bound, &stmts2,
420 					true, NULL_TREE);
421   gimple_seq_add_seq_without_update (stmts, stmts2);
422   if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
423     {
424       tree tem = gimple_convert (stmts, sizetype, enddiff);
425       tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
426       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
427 			      TREE_TYPE (enddiff),
428 			      end, tem);
429     }
430   else
431     enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
432 			    end, enddiff);
433 
434   /* Compute guard_init + (end-beg).  */
435   tree newbound;
436   enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
437   if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
438     {
439       enddiff = gimple_convert (stmts, sizetype, enddiff);
440       newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
441 			       TREE_TYPE (guard_init),
442 			       guard_init, enddiff);
443     }
444   else
445     newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
446 			     guard_init, enddiff);
447 
448   /* Depending on the direction of the IVs the new bound for the first
449      loop is the minimum or maximum of old bound and border.
450      Also, if the guard condition isn't strictly less or greater,
451      we need to adjust the bound.  */
452   int addbound = 0;
453   enum tree_code minmax;
454   if (niter->cmp == LT_EXPR)
455     {
456       /* GT and LE are the same, inverted.  */
457       if (guard_code == GT_EXPR || guard_code == LE_EXPR)
458 	addbound = -1;
459       minmax = MIN_EXPR;
460     }
461   else
462     {
463       gcc_assert (niter->cmp == GT_EXPR);
464       if (guard_code == GE_EXPR || guard_code == LT_EXPR)
465 	addbound = 1;
466       minmax = MAX_EXPR;
467     }
468 
469   if (addbound)
470     {
471       tree type2 = TREE_TYPE (newbound);
472       if (POINTER_TYPE_P (type2))
473 	type2 = sizetype;
474       newbound = gimple_build (stmts,
475 			       POINTER_TYPE_P (TREE_TYPE (newbound))
476 			       ? POINTER_PLUS_EXPR : PLUS_EXPR,
477 			       TREE_TYPE (newbound),
478 			       newbound,
479 			       build_int_cst (type2, addbound));
480     }
481 
482   tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
483 			      border, newbound);
484   return newend;
485 }
486 
487 /* Checks if LOOP contains an conditional block whose condition
488    depends on which side in the iteration space it is, and if so
489    splits the iteration space into two loops.  Returns true if the
490    loop was split.  NITER must contain the iteration descriptor for the
491    single exit of LOOP.  */
492 
493 static bool
split_loop(class loop * loop1)494 split_loop (class loop *loop1)
495 {
496   class tree_niter_desc niter;
497   basic_block *bbs;
498   unsigned i;
499   bool changed = false;
500   tree guard_iv;
501   tree border = NULL_TREE;
502   affine_iv iv;
503 
504   if (!single_exit (loop1)
505       /* ??? We could handle non-empty latches when we split the latch edge
506 	 (not the exit edge), and put the new exit condition in the new block.
507 	 OTOH this executes some code unconditionally that might have been
508 	 skipped by the original exit before.  */
509       || !empty_block_p (loop1->latch)
510       || !easy_exit_values (loop1)
511       || !number_of_iterations_exit (loop1, single_exit (loop1), &niter,
512 				     false, true)
513       || niter.cmp == ERROR_MARK
514       /* We can't yet handle loops controlled by a != predicate.  */
515       || niter.cmp == NE_EXPR)
516     return false;
517 
518   bbs = get_loop_body (loop1);
519 
520   if (!can_copy_bbs_p (bbs, loop1->num_nodes))
521     {
522       free (bbs);
523       return false;
524     }
525 
526   /* Find a splitting opportunity.  */
527   for (i = 0; i < loop1->num_nodes; i++)
528     if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
529       {
530 	/* Handling opposite steps is not implemented yet.  Neither
531 	   is handling different step sizes.  */
532 	if ((tree_int_cst_sign_bit (iv.step)
533 	     != tree_int_cst_sign_bit (niter.control.step))
534 	    || !tree_int_cst_equal (iv.step, niter.control.step))
535 	  continue;
536 
537 	/* Find a loop PHI node that defines guard_iv directly,
538 	   or create one doing that.  */
539 	gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
540 	if (!phi)
541 	  continue;
542 	gcond *guard_stmt = as_a<gcond *> (last_stmt (bbs[i]));
543 	tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
544 						 loop_preheader_edge (loop1));
545 	enum tree_code guard_code = gimple_cond_code (guard_stmt);
546 
547 	/* Loop splitting is implemented by versioning the loop, placing
548 	   the new loop after the old loop, make the first loop iterate
549 	   as long as the conditional stays true (or false) and let the
550 	   second (new) loop handle the rest of the iterations.
551 
552 	   First we need to determine if the condition will start being true
553 	   or false in the first loop.  */
554 	bool initial_true;
555 	switch (guard_code)
556 	  {
557 	    case LT_EXPR:
558 	    case LE_EXPR:
559 	      initial_true = !tree_int_cst_sign_bit (iv.step);
560 	      break;
561 	    case GT_EXPR:
562 	    case GE_EXPR:
563 	      initial_true = tree_int_cst_sign_bit (iv.step);
564 	      break;
565 	    default:
566 	      gcc_unreachable ();
567 	  }
568 
569 	/* Build a condition that will skip the first loop when the
570 	   guard condition won't ever be true (or false).  */
571 	gimple_seq stmts2;
572 	border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
573 	if (stmts2)
574 	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
575 					    stmts2);
576 	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
577 	if (!initial_true)
578 	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
579 
580 	/* Now version the loop, placing loop2 after loop1 connecting
581 	   them, and fix up SSA form for that.  */
582 	initialize_original_copy_tables ();
583 	basic_block cond_bb;
584 
585 	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
586 					   profile_probability::always (),
587 					   profile_probability::always (),
588 					   profile_probability::always (),
589 					   profile_probability::always (),
590 					   true);
591 	gcc_assert (loop2);
592 	update_ssa (TODO_update_ssa);
593 
594 	edge new_e = connect_loops (loop1, loop2);
595 	connect_loop_phis (loop1, loop2, new_e);
596 
597 	/* The iterations of the second loop is now already
598 	   exactly those that the first loop didn't do, but the
599 	   iteration space of the first loop is still the original one.
600 	   Compute the new bound for the guarding IV and patch the
601 	   loop exit to use it instead of original IV and bound.  */
602 	gimple_seq stmts = NULL;
603 	tree newend = compute_new_first_bound (&stmts, &niter, border,
604 					       guard_code, guard_init);
605 	if (stmts)
606 	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
607 					    stmts);
608 	tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
609 	patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
610 
611 	/* Finally patch out the two copies of the condition to be always
612 	   true/false (or opposite).  */
613 	gcond *force_true = as_a<gcond *> (last_stmt (bbs[i]));
614 	gcond *force_false = as_a<gcond *> (last_stmt (get_bb_copy (bbs[i])));
615 	if (!initial_true)
616 	  std::swap (force_true, force_false);
617 	gimple_cond_make_true (force_true);
618 	gimple_cond_make_false (force_false);
619 	update_stmt (force_true);
620 	update_stmt (force_false);
621 
622 	free_original_copy_tables ();
623 
624 	/* We destroyed LCSSA form above.  Eventually we might be able
625 	   to fix it on the fly, for now simply punt and use the helper.  */
626 	rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
627 
628 	changed = true;
629 	if (dump_file && (dump_flags & TDF_DETAILS))
630 	  fprintf (dump_file, ";; Loop split.\n");
631 
632 	/* Only deal with the first opportunity.  */
633 	break;
634       }
635 
636   free (bbs);
637   return changed;
638 }
639 
640 /* Another transformation of loops like:
641 
642    for (i = INIT (); CHECK (i); i = NEXT ())
643      {
644        if (expr (a_1, a_2, ..., a_n))  // expr is pure
645          a_j = ...;  // change at least one a_j
646        else
647          S;          // not change any a_j
648      }
649 
650    into:
651 
652    for (i = INIT (); CHECK (i); i = NEXT ())
653      {
654        if (expr (a_1, a_2, ..., a_n))
655          a_j = ...;
656        else
657          {
658            S;
659            i = NEXT ();
660            break;
661          }
662      }
663 
664    for (; CHECK (i); i = NEXT ())
665      {
666        S;
667      }
668 
669    */
670 
671 /* Data structure to hold temporary information during loop split upon
672    semi-invariant conditional statement.  */
673 class split_info {
674 public:
675   /* Array of all basic blocks in a loop, returned by get_loop_body().  */
676   basic_block *bbs;
677 
678   /* All memory store/clobber statements in a loop.  */
679   auto_vec<gimple *> memory_stores;
680 
681   /* Whether above memory stores vector has been filled.  */
682   int need_init;
683 
684   /* Control dependencies of basic blocks in a loop.  */
685   auto_vec<hash_set<basic_block> *> control_deps;
686 
split_info()687   split_info () : bbs (NULL),  need_init (true) { }
688 
~split_info()689   ~split_info ()
690     {
691       if (bbs)
692 	free (bbs);
693 
694       for (unsigned i = 0; i < control_deps.length (); i++)
695 	delete control_deps[i];
696     }
697 };
698 
699 /* Find all statements with memory-write effect in LOOP, including memory
700    store and non-pure function call, and keep those in a vector.  This work
701    is only done one time, for the vector should be constant during analysis
702    stage of semi-invariant condition.  */
703 
704 static void
find_vdef_in_loop(struct loop * loop)705 find_vdef_in_loop (struct loop *loop)
706 {
707   split_info *info = (split_info *) loop->aux;
708   gphi *vphi = get_virtual_phi (loop->header);
709 
710   /* Indicate memory store vector has been filled.  */
711   info->need_init = false;
712 
713   /* If loop contains memory operation, there must be a virtual PHI node in
714      loop header basic block.  */
715   if (vphi == NULL)
716     return;
717 
718   /* All virtual SSA names inside the loop are connected to be a cyclic
719      graph via virtual PHI nodes.  The virtual PHI node in loop header just
720      links the first and the last virtual SSA names, by using the last as
721      PHI operand to define the first.  */
722   const edge latch = loop_latch_edge (loop);
723   const tree first = gimple_phi_result (vphi);
724   const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
725 
726   /* The virtual SSA cyclic graph might consist of only one SSA name, who
727      is defined by itself.
728 
729        .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
730 
731      This means the loop contains only memory loads, so we can skip it.  */
732   if (first == last)
733     return;
734 
735   auto_vec<gimple *> other_stores;
736   auto_vec<tree> worklist;
737   auto_bitmap visited;
738 
739   bitmap_set_bit (visited, SSA_NAME_VERSION (first));
740   bitmap_set_bit (visited, SSA_NAME_VERSION (last));
741   worklist.safe_push (last);
742 
743   do
744     {
745       tree vuse = worklist.pop ();
746       gimple *stmt = SSA_NAME_DEF_STMT (vuse);
747 
748       /* We mark the first and last SSA names as visited at the beginning,
749 	 and reversely start the process from the last SSA name towards the
750 	 first, which ensures that this do-while will not touch SSA names
751 	 defined outside the loop.  */
752       gcc_assert (gimple_bb (stmt)
753 		  && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
754 
755       if (gimple_code (stmt) == GIMPLE_PHI)
756 	{
757 	  gphi *phi = as_a <gphi *> (stmt);
758 
759 	  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
760 	    {
761 	      tree arg = gimple_phi_arg_def (stmt, i);
762 
763 	      if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
764 		worklist.safe_push (arg);
765 	    }
766 	}
767       else
768 	{
769 	  tree prev = gimple_vuse (stmt);
770 
771 	  /* Non-pure call statement is conservatively assumed to impact all
772 	     memory locations.  So place call statements ahead of other memory
773 	     stores in the vector with an idea of using them as shortcut
774 	     terminators to memory alias analysis.  */
775 	  if (gimple_code (stmt) == GIMPLE_CALL)
776 	    info->memory_stores.safe_push (stmt);
777 	  else
778 	    other_stores.safe_push (stmt);
779 
780 	  if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
781 	    worklist.safe_push (prev);
782 	}
783     } while (!worklist.is_empty ());
784 
785     info->memory_stores.safe_splice (other_stores);
786 }
787 
788 /* Two basic blocks have equivalent control dependency if one dominates to
789    the other, and it is post-dominated by the latter.  Given a basic block
790    BB in LOOP, find farest equivalent dominating basic block.  For BB, there
791    is a constraint that BB does not post-dominate loop header of LOOP, this
792    means BB is control-dependent on at least one basic block in LOOP.  */
793 
794 static basic_block
get_control_equiv_head_block(struct loop * loop,basic_block bb)795 get_control_equiv_head_block (struct loop *loop, basic_block bb)
796 {
797   while (!bb->aux)
798     {
799       basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
800 
801       gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
802 
803       if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
804 	break;
805 
806       bb = dom_bb;
807     }
808   return bb;
809 }
810 
811 /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
812    dependent on.  */
813 
814 static hash_set<basic_block> *
find_control_dep_blocks(struct loop * loop,basic_block bb)815 find_control_dep_blocks (struct loop *loop, basic_block bb)
816 {
817   /* BB has same control dependency as loop header, then it is not control-
818      dependent on any basic block in LOOP.  */
819   if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
820     return NULL;
821 
822   basic_block equiv_head = get_control_equiv_head_block (loop, bb);
823 
824   if (equiv_head->aux)
825     {
826       /* There is a basic block containing control dependency equivalent
827 	 to BB.  No need to recompute that, and also set this information
828 	 to other equivalent basic blocks.  */
829       for (; bb != equiv_head;
830 	   bb = get_immediate_dominator (CDI_DOMINATORS, bb))
831 	bb->aux = equiv_head->aux;
832       return (hash_set<basic_block> *) equiv_head->aux;
833     }
834 
835   /* A basic block X is control-dependent on another Y iff there exists
836      a path from X to Y, in which every basic block other than X and Y
837      is post-dominated by Y, but X is not post-dominated by Y.
838 
839      According to this rule, traverse basic blocks in the loop backwards
840      starting from BB, if a basic block is post-dominated by BB, extend
841      current post-dominating path to this block, otherwise it is another
842      one that BB is control-dependent on.  */
843 
844   auto_vec<basic_block> pdom_worklist;
845   hash_set<basic_block> pdom_visited;
846   hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
847 
848   pdom_worklist.safe_push (equiv_head);
849 
850   do
851     {
852       basic_block pdom_bb = pdom_worklist.pop ();
853       edge_iterator ei;
854       edge e;
855 
856       if (pdom_visited.add (pdom_bb))
857 	continue;
858 
859       FOR_EACH_EDGE (e, ei, pdom_bb->preds)
860 	{
861 	  basic_block pred_bb = e->src;
862 
863 	  if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
864 	    {
865 	      dep_bbs->add (pred_bb);
866 	      continue;
867 	    }
868 
869 	  pred_bb = get_control_equiv_head_block (loop, pred_bb);
870 
871 	  if (pdom_visited.contains (pred_bb))
872 	    continue;
873 
874 	  if (!pred_bb->aux)
875 	    {
876 	      pdom_worklist.safe_push (pred_bb);
877 	      continue;
878 	    }
879 
880 	  /* If control dependency of basic block is available, fast extend
881 	     post-dominating path using the information instead of advancing
882 	     forward step-by-step.  */
883 	  hash_set<basic_block> *pred_dep_bbs
884 			= (hash_set<basic_block> *) pred_bb->aux;
885 
886 	  for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
887 	       iter != pred_dep_bbs->end (); ++iter)
888 	    {
889 	      basic_block pred_dep_bb = *iter;
890 
891 	      /* Basic blocks can either be in control dependency of BB, or
892 		 must be post-dominated by BB, if so, extend the path from
893 		 these basic blocks.  */
894 	      if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
895 		dep_bbs->add (pred_dep_bb);
896 	      else if (!pdom_visited.contains (pred_dep_bb))
897 		pdom_worklist.safe_push (pred_dep_bb);
898 	    }
899 	}
900     } while (!pdom_worklist.is_empty ());
901 
902   /* Record computed control dependencies in loop so that we can reach them
903      when reclaiming resources.  */
904   ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
905 
906   /* Associate control dependence with related equivalent basic blocks.  */
907   for (equiv_head->aux = dep_bbs; bb != equiv_head;
908        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
909     bb->aux = dep_bbs;
910 
911   return dep_bbs;
912 }
913 
914 /* Forward declaration */
915 
916 static bool
917 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
918 			 const_basic_block skip_head,
919 			 hash_map<gimple *, bool> &stmt_stat);
920 
921 /* Given STMT, memory load or pure call statement, check whether it is impacted
922    by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
923    trace is composed of SKIP_HEAD and those basic block dominated by it, always
924    corresponds to one branch of a conditional statement).  If SKIP_HEAD is
925    NULL, all basic blocks of LOOP are checked.  */
926 
927 static bool
vuse_semi_invariant_p(struct loop * loop,gimple * stmt,const_basic_block skip_head)928 vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
929 		       const_basic_block skip_head)
930 {
931   split_info *info = (split_info *) loop->aux;
932   tree rhs = NULL_TREE;
933   ao_ref ref;
934   gimple *store;
935   unsigned i;
936 
937   /* Collect memory store/clobber statements if haven't done that.  */
938   if (info->need_init)
939     find_vdef_in_loop (loop);
940 
941   if (is_gimple_assign (stmt))
942     rhs = gimple_assign_rhs1 (stmt);
943 
944   ao_ref_init (&ref, rhs);
945 
946   FOR_EACH_VEC_ELT (info->memory_stores, i, store)
947     {
948       /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
949       if (skip_head
950 	  && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
951 	continue;
952 
953       if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
954 	return false;
955     }
956 
957   return true;
958 }
959 
960 /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
961    certain iteration of LOOP, check whether an SSA name (NAME) remains
962    unchanged in next iteration.  We call this characteristic semi-
963    invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all basic
964    blocks and control flows in the loop will be considered.  Semi-invariant
965    state of checked statement is cached in hash map STMT_STAT to avoid
966    redundant computation in possible following re-check.  */
967 
968 static inline bool
ssa_semi_invariant_p(struct loop * loop,tree name,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)969 ssa_semi_invariant_p (struct loop *loop, tree name,
970 		      const_basic_block skip_head,
971 		      hash_map<gimple *, bool> &stmt_stat)
972 {
973   gimple *def = SSA_NAME_DEF_STMT (name);
974   const_basic_block def_bb = gimple_bb (def);
975 
976   /* An SSA name defined outside loop is definitely semi-invariant.  */
977   if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
978     return true;
979 
980   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
981     return false;
982 
983   return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
984 }
985 
986 /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
987    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
988    are excluded from LOOP.  */
989 
990 static bool
loop_iter_phi_semi_invariant_p(struct loop * loop,gphi * loop_phi,const_basic_block skip_head)991 loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
992 				const_basic_block skip_head)
993 {
994   const_edge latch = loop_latch_edge (loop);
995   tree name = gimple_phi_result (loop_phi);
996   tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
997 
998   gcc_checking_assert (from);
999 
1000   /* Loop iteration PHI node locates in loop header, and it has two source
1001      operands, one is an initial value coming from outside the loop, the other
1002      is a value through latch of the loop, which is derived in last iteration,
1003      we call the latter latch value.  From the PHI node to definition of latch
1004      value, if excluding branch trace starting from SKIP_HEAD, except copy-
1005      assignment or likewise, there is no other kind of value redefinition, SSA
1006      name defined by the PHI node is semi-invariant.
1007 
1008                          loop entry
1009                               |     .--- latch ---.
1010                               |     |             |
1011                               v     v             |
1012                   x_1 = PHI <x_0,  x_3>           |
1013                            |                      |
1014                            v                      |
1015               .------- if (cond) -------.         |
1016               |                         |         |
1017               |                     [ SKIP ]      |
1018               |                         |         |
1019               |                     x_2 = ...     |
1020               |                         |         |
1021               '---- T ---->.<---- F ----'         |
1022                            |                      |
1023                            v                      |
1024                   x_3 = PHI <x_1, x_2>            |
1025                            |                      |
1026                            '----------------------'
1027 
1028      Suppose in certain iteration, execution flow in above graph goes through
1029      true branch, which means that one source value to define x_3 in false
1030      branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1031      iterations is defined by x_3, we know that x_1 will never changed if COND
1032      always chooses true branch from then on.  */
1033 
1034   while (from != name)
1035     {
1036       /* A new value comes from a CONSTANT.  */
1037       if (TREE_CODE (from) != SSA_NAME)
1038 	return false;
1039 
1040       gimple *stmt = SSA_NAME_DEF_STMT (from);
1041       const_basic_block bb = gimple_bb (stmt);
1042 
1043       /* A new value comes from outside the loop.  */
1044       if (!bb || !flow_bb_inside_loop_p (loop, bb))
1045 	return false;
1046 
1047       from = NULL_TREE;
1048 
1049       if (gimple_code (stmt) == GIMPLE_PHI)
1050 	{
1051 	  gphi *phi = as_a <gphi *> (stmt);
1052 
1053 	  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1054 	    {
1055 	      if (skip_head)
1056 		{
1057 		  const_edge e = gimple_phi_arg_edge (phi, i);
1058 
1059 		  /* Don't consider redefinitions in excluded basic blocks.  */
1060 		  if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1061 		    continue;
1062 		}
1063 
1064 	      tree arg = gimple_phi_arg_def (phi, i);
1065 
1066 	      if (!from)
1067 		from = arg;
1068 	      else if (!operand_equal_p (from, arg, 0))
1069 		/* There are more than one source operands that provide
1070 		   different values to the SSA name, it is variant.  */
1071 		return false;
1072 	    }
1073 	}
1074       else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1075 	{
1076 	  /* For simple value copy, check its rhs instead.  */
1077 	  if (gimple_assign_ssa_name_copy_p (stmt))
1078 	    from = gimple_assign_rhs1 (stmt);
1079 	}
1080 
1081       /* Any other kind of definition is deemed to introduce a new value
1082 	 to the SSA name.  */
1083       if (!from)
1084 	return false;
1085     }
1086   return true;
1087 }
1088 
1089 /* Check whether conditional predicates that BB is control-dependent on, are
1090    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1091    are excluded from LOOP.  Semi-invariant state of checked statement is cached
1092    in hash map STMT_STAT.  */
1093 
1094 static bool
control_dep_semi_invariant_p(struct loop * loop,basic_block bb,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)1095 control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1096 			      const_basic_block skip_head,
1097 			      hash_map<gimple *, bool> &stmt_stat)
1098 {
1099   hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1100 
1101   if (!dep_bbs)
1102     return true;
1103 
1104   for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1105        iter != dep_bbs->end (); ++iter)
1106     {
1107       gimple *last = last_stmt (*iter);
1108 
1109       if (!last)
1110 	return false;
1111 
1112       /* Only check condition predicates.  */
1113       if (gimple_code (last) != GIMPLE_COND
1114 	  && gimple_code (last) != GIMPLE_SWITCH)
1115 	return false;
1116 
1117       if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1118 	return false;
1119     }
1120 
1121   return true;
1122 }
1123 
1124 /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1125    semi-invariant, consequently, all its defined values are semi-invariant.
1126    Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1127    Semi-invariant state of checked statement is cached in hash map
1128    STMT_STAT.  */
1129 
1130 static bool
stmt_semi_invariant_p_1(struct loop * loop,gimple * stmt,const_basic_block skip_head,hash_map<gimple *,bool> & stmt_stat)1131 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1132 			 const_basic_block skip_head,
1133 			 hash_map<gimple *, bool> &stmt_stat)
1134 {
1135   bool existed;
1136   bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1137 
1138   if (existed)
1139     return invar;
1140 
1141   /* A statement might depend on itself, which is treated as variant.  So set
1142      state of statement under check to be variant to ensure that.  */
1143   invar = false;
1144 
1145   if (gimple_code (stmt) == GIMPLE_PHI)
1146     {
1147       gphi *phi = as_a <gphi *> (stmt);
1148 
1149       if (gimple_bb (stmt) == loop->header)
1150 	{
1151 	  /* If the entry value is subject to abnormal coalescing
1152 	     avoid the transform since we're going to duplicate the
1153 	     loop header and thus likely introduce overlapping life-ranges
1154 	     between the PHI def and the entry on the path when the
1155 	     first loop is skipped.  */
1156 	  tree entry_def
1157 	    = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1158 	  if (TREE_CODE (entry_def) == SSA_NAME
1159 	      && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1160 	    return false;
1161 	  invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1162 	  return invar;
1163 	}
1164 
1165       /* For a loop PHI node that does not locate in loop header, it is semi-
1166 	 invariant only if two conditions are met.  The first is its source
1167 	 values are derived from CONSTANT (including loop-invariant value), or
1168 	 from SSA name defined by semi-invariant loop iteration PHI node.  The
1169 	 second is its source incoming edges are control-dependent on semi-
1170 	 invariant conditional predicates.  */
1171       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1172 	{
1173 	  const_edge e = gimple_phi_arg_edge (phi, i);
1174 	  tree arg = gimple_phi_arg_def (phi, i);
1175 
1176 	  if (TREE_CODE (arg) == SSA_NAME)
1177 	    {
1178 	      if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1179 		return false;
1180 
1181 	      /* If source value is defined in location from where the source
1182 		 edge comes in, no need to check control dependency again
1183 		 since this has been done in above SSA name check stage.  */
1184 	      if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1185 		continue;
1186 	    }
1187 
1188 	  if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1189 					     stmt_stat))
1190 	    return false;
1191 	}
1192     }
1193   else
1194     {
1195       ssa_op_iter iter;
1196       tree use;
1197 
1198       /* Volatile memory load or return of normal (non-const/non-pure) call
1199 	 should not be treated as constant in each iteration of loop.  */
1200       if (gimple_has_side_effects (stmt))
1201 	return false;
1202 
1203       /* Check if any memory store may kill memory load at this place.  */
1204       if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1205 	return false;
1206 
1207       /* Although operand of a statement might be SSA name, CONSTANT or
1208 	 VARDECL, here we only need to check SSA name operands.  This is
1209 	 because check on VARDECL operands, which involve memory loads,
1210 	 must have been done prior to invocation of this function in
1211 	 vuse_semi_invariant_p.  */
1212       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1213 	if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1214 	  return false;
1215     }
1216 
1217   if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1218 				     stmt_stat))
1219     return false;
1220 
1221   /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1222      to new insertion, and thus invar may point to invalid memory.  */
1223   stmt_stat.put (stmt, true);
1224   return true;
1225 }
1226 
1227 /* A helper function to check whether STMT is semi-invariant in LOOP.  Basic
1228    blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.  */
1229 
1230 static bool
stmt_semi_invariant_p(struct loop * loop,gimple * stmt,const_basic_block skip_head)1231 stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1232 		       const_basic_block skip_head)
1233 {
1234   hash_map<gimple *, bool> stmt_stat;
1235   return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1236 }
1237 
1238 /* Determine when conditional statement never transfers execution to one of its
1239    branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1240    and those basic blocks dominated by BRANCH_BB.  */
1241 
1242 static bool
branch_removable_p(basic_block branch_bb)1243 branch_removable_p (basic_block branch_bb)
1244 {
1245   edge_iterator ei;
1246   edge e;
1247 
1248   if (single_pred_p (branch_bb))
1249     return true;
1250 
1251   FOR_EACH_EDGE (e, ei, branch_bb->preds)
1252     {
1253       if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1254 	continue;
1255 
1256       if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1257 	continue;
1258 
1259        /* The branch can be reached from opposite branch, or from some
1260 	  statement not dominated by the conditional statement.  */
1261       return false;
1262     }
1263 
1264   return true;
1265 }
1266 
1267 /* Find out which branch of a conditional statement (COND) is invariant in the
1268    execution context of LOOP.  That is: once the branch is selected in certain
1269    iteration of the loop, any operand that contributes to computation of the
1270    conditional statement remains unchanged in all following iterations.  */
1271 
1272 static edge
get_cond_invariant_branch(struct loop * loop,gcond * cond)1273 get_cond_invariant_branch (struct loop *loop, gcond *cond)
1274 {
1275   basic_block cond_bb = gimple_bb (cond);
1276   basic_block targ_bb[2];
1277   bool invar[2];
1278   unsigned invar_checks = 0;
1279 
1280   for (unsigned i = 0; i < 2; i++)
1281     {
1282       targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1283 
1284       /* One branch directs to loop exit, no need to perform loop split upon
1285 	 this conditional statement.  Firstly, it is trivial if the exit branch
1286 	 is semi-invariant, for the statement is just to break loop.  Secondly,
1287 	 if the opposite branch is semi-invariant, it means that the statement
1288 	 is real loop-invariant, which is covered by loop unswitch.  */
1289       if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1290 	return NULL;
1291     }
1292 
1293   for (unsigned i = 0; i < 2; i++)
1294     {
1295       invar[!i] = false;
1296 
1297       if (!branch_removable_p (targ_bb[i]))
1298 	continue;
1299 
1300       /* Given a semi-invariant branch, if its opposite branch dominates
1301 	 loop latch, it and its following trace will only be executed in
1302 	 final iteration of loop, namely it is not part of repeated body
1303 	 of the loop.  Similar to the above case that the branch is loop
1304 	 exit, no need to split loop.  */
1305       if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1306 	continue;
1307 
1308       invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1309       invar_checks++;
1310     }
1311 
1312   /* With both branches being invariant (handled by loop unswitch) or
1313      variant is not what we want.  */
1314   if (invar[0] ^ !invar[1])
1315     return NULL;
1316 
1317   /* Found a real loop-invariant condition, do nothing.  */
1318   if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1319     return NULL;
1320 
1321   return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1322 }
1323 
1324 /* Calculate increased code size measured by estimated insn number if applying
1325    loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
1326 
1327 static int
compute_added_num_insns(struct loop * loop,const_edge branch_edge)1328 compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1329 {
1330   basic_block cond_bb = branch_edge->src;
1331   unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1332   basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1333   basic_block *bbs = ((split_info *) loop->aux)->bbs;
1334   int num = 0;
1335 
1336   for (unsigned i = 0; i < loop->num_nodes; i++)
1337     {
1338       /* Do no count basic blocks only in opposite branch.  */
1339       if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1340 	continue;
1341 
1342       num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1343     }
1344 
1345   /* It is unnecessary to evaluate expression of the conditional statement
1346      in new loop that contains only invariant branch.  This expression should
1347      be constant value (either true or false).  Exclude code size of insns
1348      that contribute to computation of the expression.  */
1349 
1350   auto_vec<gimple *> worklist;
1351   hash_set<gimple *> removed;
1352   gimple *stmt = last_stmt (cond_bb);
1353 
1354   worklist.safe_push (stmt);
1355   removed.add (stmt);
1356   num -= estimate_num_insns (stmt, &eni_size_weights);
1357 
1358   do
1359     {
1360       ssa_op_iter opnd_iter;
1361       use_operand_p opnd_p;
1362 
1363       stmt = worklist.pop ();
1364       FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1365 	{
1366 	  tree opnd = USE_FROM_PTR (opnd_p);
1367 
1368 	  if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1369 	    continue;
1370 
1371 	  gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1372 	  use_operand_p use_p;
1373 	  imm_use_iterator use_iter;
1374 
1375 	  if (removed.contains (opnd_stmt)
1376 	      || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1377 	    continue;
1378 
1379 	  FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1380 	    {
1381 	      gimple *use_stmt = USE_STMT (use_p);
1382 
1383 	      if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1384 		{
1385 		  opnd_stmt = NULL;
1386 		  break;
1387 		}
1388 	    }
1389 
1390 	  if (opnd_stmt)
1391 	    {
1392 	      worklist.safe_push (opnd_stmt);
1393 	      removed.add (opnd_stmt);
1394 	      num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1395 	    }
1396 	}
1397     } while (!worklist.is_empty ());
1398 
1399   gcc_assert (num >= 0);
1400   return num;
1401 }
1402 
1403 /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1404    and check whether it is eligible and profitable to perform loop split upon
1405    this branch in LOOP.  */
1406 
1407 static edge
get_cond_branch_to_split_loop(struct loop * loop,gcond * cond)1408 get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1409 {
1410   edge invar_branch = get_cond_invariant_branch (loop, cond);
1411   if (!invar_branch)
1412     return NULL;
1413 
1414   /* When accurate profile information is available, and execution
1415      frequency of the branch is too low, just let it go.  */
1416   profile_probability prob = invar_branch->probability;
1417   if (prob.reliable_p ())
1418     {
1419       int thres = param_min_loop_cond_split_prob;
1420 
1421       if (prob < profile_probability::always ().apply_scale (thres, 100))
1422 	return NULL;
1423     }
1424 
1425   /* Add a threshold for increased code size to disable loop split.  */
1426   if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1427     return NULL;
1428 
1429   return invar_branch;
1430 }
1431 
1432 /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1433    conditional statement, perform loop split transformation illustrated
1434    as the following graph.
1435 
1436                .-------T------ if (true) ------F------.
1437                |                    .---------------. |
1438                |                    |               | |
1439                v                    |               v v
1440           pre-header                |            pre-header
1441                | .------------.     |                 | .------------.
1442                | |            |     |                 | |            |
1443                | v            |     |                 | v            |
1444              header           |     |               header           |
1445                |              |     |                 |              |
1446       .--- if (cond) ---.     |     |        .--- if (true) ---.     |
1447       |                 |     |     |        |                 |     |
1448   invariant             |     |     |    invariant             |     |
1449       |                 |     |     |        |                 |     |
1450       '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
1451                |              |    /                  |              |
1452              stmts            |   /                 stmts            |
1453                |              F  T                    |              |
1454               / \             | /                    / \             |
1455      .-------*   *      [ if (cond) ]       .-------*   *            |
1456      |           |            |             |           |            |
1457      |         latch          |             |         latch          |
1458      |           |            |             |           |            |
1459      |           '------------'             |           '------------'
1460      '------------------------. .-----------'
1461              loop1            | |                   loop2
1462                               v v
1463                              exits
1464 
1465    In the graph, loop1 represents the part derived from original one, and
1466    loop2 is duplicated using loop_version (), which corresponds to the part
1467    of original one being splitted out.  In original latch edge of loop1, we
1468    insert a new conditional statement duplicated from the semi-invariant cond,
1469    and one of its branch goes back to loop1 header as a latch edge, and the
1470    other branch goes to loop2 pre-header as an entry edge.  And also in loop2,
1471    we abandon the variant branch of the conditional statement by setting a
1472    constant bool condition, based on which branch is semi-invariant.  */
1473 
1474 static bool
do_split_loop_on_cond(struct loop * loop1,edge invar_branch)1475 do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1476 {
1477   basic_block cond_bb = invar_branch->src;
1478   bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1479   gcond *cond = as_a <gcond *> (last_stmt (cond_bb));
1480 
1481   gcc_assert (cond_bb->loop_father == loop1);
1482 
1483   if (dump_enabled_p ())
1484     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1485 		     "loop split on semi-invariant condition at %s branch\n",
1486 		     true_invar ? "true" : "false");
1487 
1488   initialize_original_copy_tables ();
1489 
1490   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1491 				     profile_probability::always (),
1492 				     profile_probability::never (),
1493 				     profile_probability::always (),
1494 				     profile_probability::always (),
1495 				     true);
1496   if (!loop2)
1497     {
1498       free_original_copy_tables ();
1499       return false;
1500     }
1501 
1502   basic_block cond_bb_copy = get_bb_copy (cond_bb);
1503   gcond *cond_copy = as_a<gcond *> (last_stmt (cond_bb_copy));
1504 
1505   /* Replace the condition in loop2 with a bool constant to let PassManager
1506      remove the variant branch after current pass completes.  */
1507   if (true_invar)
1508     gimple_cond_make_true (cond_copy);
1509   else
1510     gimple_cond_make_false (cond_copy);
1511 
1512   update_stmt (cond_copy);
1513 
1514   /* Insert a new conditional statement on latch edge of loop1, its condition
1515      is duplicated from the semi-invariant.  This statement acts as a switch
1516      to transfer execution from loop1 to loop2, when loop1 enters into
1517      invariant state.  */
1518   basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1519   basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1520   gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1521 					  gimple_cond_lhs (cond),
1522 					  gimple_cond_rhs (cond),
1523 					  NULL_TREE, NULL_TREE);
1524 
1525   gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1526   gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1527 
1528   edge to_loop1 = single_succ_edge (break_bb);
1529   edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1530 
1531   to_loop1->flags &= ~EDGE_FALLTHRU;
1532   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1533   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1534 
1535   update_ssa (TODO_update_ssa);
1536 
1537   /* Due to introduction of a control flow edge from loop1 latch to loop2
1538      pre-header, we should update PHIs in loop2 to reflect this connection
1539      between loop1 and loop2.  */
1540   connect_loop_phis (loop1, loop2, to_loop2);
1541 
1542   free_original_copy_tables ();
1543 
1544   rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
1545 
1546   return true;
1547 }
1548 
1549 /* Traverse all conditional statements in LOOP, to find out a good candidate
1550    upon which we can do loop split.  */
1551 
1552 static bool
split_loop_on_cond(struct loop * loop)1553 split_loop_on_cond (struct loop *loop)
1554 {
1555   split_info *info = new split_info ();
1556   basic_block *bbs = info->bbs = get_loop_body (loop);
1557   bool do_split = false;
1558 
1559   /* Allocate an area to keep temporary info, and associate its address
1560      with loop aux field.  */
1561   loop->aux = info;
1562 
1563   for (unsigned i = 0; i < loop->num_nodes; i++)
1564     bbs[i]->aux = NULL;
1565 
1566   for (unsigned i = 0; i < loop->num_nodes; i++)
1567     {
1568       basic_block bb = bbs[i];
1569 
1570       /* We only consider conditional statement, which be executed at most once
1571 	 in each iteration of the loop.  So skip statements in inner loops.  */
1572       if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1573 	continue;
1574 
1575       /* Actually this check is not a must constraint.  With it, we can ensure
1576 	 conditional statement will always be executed in each iteration.  */
1577       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1578 	continue;
1579 
1580       gimple *last = last_stmt (bb);
1581 
1582       if (!last || gimple_code (last) != GIMPLE_COND)
1583 	continue;
1584 
1585       gcond *cond = as_a <gcond *> (last);
1586       edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1587 
1588       if (branch_edge)
1589 	{
1590 	  do_split_loop_on_cond (loop, branch_edge);
1591 	  do_split = true;
1592 	  break;
1593 	}
1594     }
1595 
1596   delete info;
1597   loop->aux = NULL;
1598 
1599   return do_split;
1600 }
1601 
1602 /* Main entry point.  Perform loop splitting on all suitable loops.  */
1603 
1604 static unsigned int
tree_ssa_split_loops(void)1605 tree_ssa_split_loops (void)
1606 {
1607   class loop *loop;
1608   bool changed = false;
1609 
1610   gcc_assert (scev_initialized_p ());
1611 
1612   calculate_dominance_info (CDI_POST_DOMINATORS);
1613 
1614   FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
1615     loop->aux = NULL;
1616 
1617   /* Go through all loops starting from innermost.  */
1618   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1619     {
1620       if (loop->aux)
1621 	{
1622 	  /* If any of our inner loops was split, don't split us,
1623 	     and mark our containing loop as having had splits as well.  */
1624 	  loop_outer (loop)->aux = loop;
1625 	  continue;
1626 	}
1627 
1628       if (optimize_loop_for_size_p (loop))
1629 	continue;
1630 
1631       if (split_loop (loop) || split_loop_on_cond (loop))
1632 	{
1633 	  /* Mark our containing loop as having had some split inner loops.  */
1634 	  loop_outer (loop)->aux = loop;
1635 	  changed = true;
1636 	}
1637     }
1638 
1639   FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
1640     loop->aux = NULL;
1641 
1642   clear_aux_for_blocks ();
1643 
1644   free_dominance_info (CDI_POST_DOMINATORS);
1645 
1646   if (changed)
1647     return TODO_cleanup_cfg;
1648   return 0;
1649 }
1650 
1651 /* Loop splitting pass.  */
1652 
1653 namespace {
1654 
1655 const pass_data pass_data_loop_split =
1656 {
1657   GIMPLE_PASS, /* type */
1658   "lsplit", /* name */
1659   OPTGROUP_LOOP, /* optinfo_flags */
1660   TV_LOOP_SPLIT, /* tv_id */
1661   PROP_cfg, /* properties_required */
1662   0, /* properties_provided */
1663   0, /* properties_destroyed */
1664   0, /* todo_flags_start */
1665   0, /* todo_flags_finish */
1666 };
1667 
1668 class pass_loop_split : public gimple_opt_pass
1669 {
1670 public:
pass_loop_split(gcc::context * ctxt)1671   pass_loop_split (gcc::context *ctxt)
1672     : gimple_opt_pass (pass_data_loop_split, ctxt)
1673   {}
1674 
1675   /* opt_pass methods: */
gate(function *)1676   virtual bool gate (function *) { return flag_split_loops != 0; }
1677   virtual unsigned int execute (function *);
1678 
1679 }; // class pass_loop_split
1680 
1681 unsigned int
execute(function * fun)1682 pass_loop_split::execute (function *fun)
1683 {
1684   if (number_of_loops (fun) <= 1)
1685     return 0;
1686 
1687   return tree_ssa_split_loops ();
1688 }
1689 
1690 } // anon namespace
1691 
1692 gimple_opt_pass *
make_pass_loop_split(gcc::context * ctxt)1693 make_pass_loop_split (gcc::context *ctxt)
1694 {
1695   return new pass_loop_split (ctxt);
1696 }
1697