1 /* Loop unswitching for GNU compiler.
2    Copyright (C) 2002-2013 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hard-reg-set.h"
26 #include "obstack.h"
27 #include "basic-block.h"
28 #include "cfgloop.h"
29 #include "params.h"
30 #include "expr.h"
31 #include "dumpfile.h"
32 
33 /* This pass moves constant conditions out of loops, duplicating the loop
34    in progress, i.e. this code:
35 
36    while (loop_cond)
37      {
38        A;
39        if (cond)
40          branch1;
41        else
42 	 branch2;
43        B;
44        if (cond)
45          branch3;
46        C;
47      }
48    where nothing inside the loop alters cond is transformed
49    into
50 
51    if (cond)
52      {
53        while (loop_cond)
54 	 {
55 	   A;
56 	   branch1;
57 	   B;
58 	   branch3;
59 	   C;
60 	 }
61      }
62    else
63      {
64        while (loop_cond)
65 	 {
66 	   A;
67 	   branch2;
68 	   B;
69 	   C;
70 	 }
71      }
72 
73   Duplicating the loop might lead to code growth exponential in number of
74   branches inside loop, so we limit the number of unswitchings performed
75   in a single loop to PARAM_MAX_UNSWITCH_LEVEL.  We only perform the
76   transformation on innermost loops, as the benefit of doing it on loops
77   containing subloops would not be very large compared to complications
78   with handling this case.  */
79 
80 static struct loop *unswitch_loop (struct loop *, basic_block, rtx, rtx);
81 static bool unswitch_single_loop (struct loop *, rtx, int);
82 static rtx may_unswitch_on (basic_block, struct loop *, rtx *);
83 
84 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
85    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
86    in order to create a jump.  */
87 
88 rtx
compare_and_jump_seq(rtx op0,rtx op1,enum rtx_code comp,rtx label,int prob,rtx cinsn)89 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
90 		      rtx cinsn)
91 {
92   rtx seq, jump, cond;
93   enum machine_mode mode;
94 
95   mode = GET_MODE (op0);
96   if (mode == VOIDmode)
97     mode = GET_MODE (op1);
98 
99   start_sequence ();
100   if (GET_MODE_CLASS (mode) == MODE_CC)
101     {
102       /* A hack -- there seems to be no easy generic way how to make a
103 	 conditional jump from a ccmode comparison.  */
104       gcc_assert (cinsn);
105       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
106       gcc_assert (GET_CODE (cond) == comp);
107       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
108       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
109       emit_jump_insn (copy_insn (PATTERN (cinsn)));
110       jump = get_last_insn ();
111       gcc_assert (JUMP_P (jump));
112       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
113       LABEL_NUSES (JUMP_LABEL (jump))++;
114       redirect_jump (jump, label, 0);
115     }
116   else
117     {
118       gcc_assert (!cinsn);
119 
120       op0 = force_operand (op0, NULL_RTX);
121       op1 = force_operand (op1, NULL_RTX);
122       do_compare_rtx_and_jump (op0, op1, comp, 0,
123 			       mode, NULL_RTX, NULL_RTX, label, -1);
124       jump = get_last_insn ();
125       gcc_assert (JUMP_P (jump));
126       JUMP_LABEL (jump) = label;
127       LABEL_NUSES (label)++;
128     }
129   add_reg_note (jump, REG_BR_PROB, GEN_INT (prob));
130 
131   seq = get_insns ();
132   end_sequence ();
133 
134   return seq;
135 }
136 
137 /* Main entry point.  Perform loop unswitching on all suitable loops.  */
138 void
unswitch_loops(void)139 unswitch_loops (void)
140 {
141   loop_iterator li;
142   struct loop *loop;
143   bool changed = false;
144 
145   /* Go through inner loops (only original ones).  */
146 
147   FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
148     changed |= unswitch_single_loop (loop, NULL_RTX, 0);
149 
150   iv_analysis_done ();
151 
152   /* If we unswitched any loop discover new loops that are eventually
153      exposed by making irreducible regions reducible.  */
154   if (changed)
155     {
156       calculate_dominance_info (CDI_DOMINATORS);
157       fix_loop_structure (NULL);
158     }
159 }
160 
161 /* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
162    basic blocks (for what it means see comments below).  In case condition
163    compares loop invariant cc mode register, return the jump in CINSN.  */
164 
165 static rtx
may_unswitch_on(basic_block bb,struct loop * loop,rtx * cinsn)166 may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
167 {
168   rtx test, at, op[2], stest;
169   struct rtx_iv iv;
170   unsigned i;
171   enum machine_mode mode;
172 
173   /* BB must end in a simple conditional jump.  */
174   if (EDGE_COUNT (bb->succs) != 2)
175     return NULL_RTX;
176   if (!any_condjump_p (BB_END (bb)))
177     return NULL_RTX;
178 
179   /* With branches inside loop.  */
180   if (!flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 0)->dest)
181       || !flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 1)->dest))
182     return NULL_RTX;
183 
184   /* It must be executed just once each iteration (because otherwise we
185      are unable to update dominator/irreducible loop information correctly).  */
186   if (!just_once_each_iteration_p (loop, bb))
187     return NULL_RTX;
188 
189   /* Condition must be invariant.  */
190   test = get_condition (BB_END (bb), &at, true, false);
191   if (!test)
192     return NULL_RTX;
193 
194   for (i = 0; i < 2; i++)
195     {
196       op[i] = XEXP (test, i);
197 
198       if (CONSTANT_P (op[i]))
199 	continue;
200 
201       if (!iv_analyze (at, op[i], &iv))
202 	return NULL_RTX;
203       if (iv.step != const0_rtx
204 	  || iv.first_special)
205 	return NULL_RTX;
206 
207       op[i] = get_iv_value (&iv, const0_rtx);
208     }
209 
210   mode = GET_MODE (op[0]);
211   if (mode == VOIDmode)
212     mode = GET_MODE (op[1]);
213   if (GET_MODE_CLASS (mode) == MODE_CC)
214     {
215       if (at != BB_END (bb))
216 	return NULL_RTX;
217 
218       if (!rtx_equal_p (op[0], XEXP (test, 0))
219 	  || !rtx_equal_p (op[1], XEXP (test, 1)))
220 	return NULL_RTX;
221 
222       *cinsn = BB_END (bb);
223       return test;
224     }
225 
226   stest = simplify_gen_relational (GET_CODE (test), SImode,
227 				   mode, op[0], op[1]);
228   if (stest == const0_rtx
229       || stest == const_true_rtx)
230     return stest;
231 
232   return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
233 					  op[0], op[1]));
234 }
235 
236 /* Reverses CONDition; returns NULL if we cannot.  */
237 rtx
reversed_condition(rtx cond)238 reversed_condition (rtx cond)
239 {
240   enum rtx_code reversed;
241   reversed = reversed_comparison_code (cond, NULL);
242   if (reversed == UNKNOWN)
243     return NULL_RTX;
244   else
245     return gen_rtx_fmt_ee (reversed,
246 			   GET_MODE (cond), XEXP (cond, 0),
247 			   XEXP (cond, 1));
248 }
249 
250 /* Unswitch single LOOP.  COND_CHECKED holds list of conditions we already
251    unswitched on and are therefore known to be true in this LOOP.  NUM is
252    number of unswitchings done; do not allow it to grow too much, it is too
253    easy to create example on that the code would grow exponentially.
254    Returns true LOOP was unswitched.  */
255 static bool
unswitch_single_loop(struct loop * loop,rtx cond_checked,int num)256 unswitch_single_loop (struct loop *loop, rtx cond_checked, int num)
257 {
258   basic_block *bbs;
259   struct loop *nloop;
260   unsigned i;
261   rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn;
262   int repeat;
263   edge e;
264   HOST_WIDE_INT iterations;
265 
266   /* Do not unswitch too much.  */
267   if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL))
268     {
269       if (dump_file)
270 	fprintf (dump_file, ";; Not unswitching anymore, hit max level\n");
271       return false;
272     }
273 
274   /* Only unswitch innermost loops.  */
275   if (loop->inner)
276     {
277       if (dump_file)
278 	fprintf (dump_file, ";; Not unswitching, not innermost loop\n");
279       return false;
280     }
281 
282   /* We must be able to duplicate loop body.  */
283   if (!can_duplicate_loop_p (loop))
284     {
285       if (dump_file)
286 	fprintf (dump_file, ";; Not unswitching, can't duplicate loop\n");
287       return false;
288     }
289 
290   /* The loop should not be too large, to limit code growth.  */
291   if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS))
292     {
293       if (dump_file)
294 	fprintf (dump_file, ";; Not unswitching, loop too big\n");
295       return false;
296     }
297 
298   /* Do not unswitch in cold areas.  */
299   if (optimize_loop_for_size_p (loop))
300     {
301       if (dump_file)
302 	fprintf (dump_file, ";; Not unswitching, not hot area\n");
303       return false;
304     }
305 
306   /* Nor if the loop usually does not roll.  */
307   iterations = estimated_loop_iterations_int (loop);
308   if (iterations >= 0 && iterations <= 1)
309     {
310       if (dump_file)
311 	fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n");
312       return false;
313     }
314 
315   do
316     {
317       repeat = 0;
318       cinsn = NULL_RTX;
319 
320       /* Find a bb to unswitch on.  */
321       bbs = get_loop_body (loop);
322       iv_analysis_loop_init (loop);
323       for (i = 0; i < loop->num_nodes; i++)
324 	if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
325 	  break;
326 
327       if (i == loop->num_nodes)
328 	{
329 	  free (bbs);
330 	  return false;
331 	}
332 
333       if (cond != const0_rtx
334 	  && cond != const_true_rtx)
335 	{
336 	  rcond = reversed_condition (cond);
337 	  if (rcond)
338 	    rcond = canon_condition (rcond);
339 
340 	  /* Check whether the result can be predicted.  */
341 	  for (acond = cond_checked; acond; acond = XEXP (acond, 1))
342 	    simplify_using_condition (XEXP (acond, 0), &cond, NULL);
343 	}
344 
345       if (cond == const_true_rtx)
346 	{
347 	  /* Remove false path.  */
348 	  e = FALLTHRU_EDGE (bbs[i]);
349 	  remove_path (e);
350 	  free (bbs);
351 	  repeat = 1;
352 	}
353       else if (cond == const0_rtx)
354 	{
355 	  /* Remove true path.  */
356 	  e = BRANCH_EDGE (bbs[i]);
357 	  remove_path (e);
358 	  free (bbs);
359 	  repeat = 1;
360 	}
361     } while (repeat);
362 
363   /* We found the condition we can unswitch on.  */
364   conds = alloc_EXPR_LIST (0, cond, cond_checked);
365   if (rcond)
366     rconds = alloc_EXPR_LIST (0, rcond, cond_checked);
367   else
368     rconds = cond_checked;
369 
370   if (dump_file)
371     fprintf (dump_file, ";; Unswitching loop\n");
372 
373   /* Unswitch the loop on this condition.  */
374   nloop = unswitch_loop (loop, bbs[i], copy_rtx_if_shared (cond), cinsn);
375   gcc_assert (nloop);
376 
377   /* Invoke itself on modified loops.  */
378   unswitch_single_loop (nloop, rconds, num + 1);
379   unswitch_single_loop (loop, conds, num + 1);
380 
381   free_EXPR_LIST_node (conds);
382   if (rcond)
383     free_EXPR_LIST_node (rconds);
384 
385   free (bbs);
386 
387   return true;
388 }
389 
390 /* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON.  We only support
391    unswitching of innermost loops.  UNSWITCH_ON must be executed in every
392    iteration, i.e. it must dominate LOOP latch.  COND is the condition
393    determining which loop is entered.  Returns NULL if impossible, new loop
394    otherwise.  The new loop is entered if COND is true.  If CINSN is not
395    NULL, it is the insn in that COND is compared.  */
396 
397 static struct loop *
unswitch_loop(struct loop * loop,basic_block unswitch_on,rtx cond,rtx cinsn)398 unswitch_loop (struct loop *loop, basic_block unswitch_on, rtx cond, rtx cinsn)
399 {
400   edge entry, latch_edge, true_edge, false_edge, e;
401   basic_block switch_bb, unswitch_on_alt;
402   struct loop *nloop;
403   int irred_flag, prob;
404   rtx seq;
405 
406   /* Some sanity checking.  */
407   gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on));
408   gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2);
409   gcc_assert (just_once_each_iteration_p (loop, unswitch_on));
410   gcc_assert (!loop->inner);
411   gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 0)->dest));
412   gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 1)->dest));
413 
414   entry = loop_preheader_edge (loop);
415 
416   /* Make a copy.  */
417   irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
418   entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
419   if (!duplicate_loop_to_header_edge (loop, entry, 1,
420 			      	      NULL, NULL, NULL, 0))
421     return NULL;
422   entry->flags |= irred_flag;
423 
424   /* Record the block with condition we unswitch on.  */
425   unswitch_on_alt = get_bb_copy (unswitch_on);
426   true_edge = BRANCH_EDGE (unswitch_on_alt);
427   false_edge = FALLTHRU_EDGE (unswitch_on);
428   latch_edge = single_succ_edge (get_bb_copy (loop->latch));
429 
430   /* Create a block with the condition.  */
431   prob = true_edge->probability;
432   switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
433   seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
434 			      block_label (true_edge->dest),
435 			      prob, cinsn);
436   emit_insn_after (seq, BB_END (switch_bb));
437   e = make_edge (switch_bb, true_edge->dest, 0);
438   e->probability = prob;
439   e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
440   e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
441   e->probability = false_edge->probability;
442   e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;
443 
444   if (irred_flag)
445     {
446       switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
447       EDGE_SUCC (switch_bb, 0)->flags |= EDGE_IRREDUCIBLE_LOOP;
448       EDGE_SUCC (switch_bb, 1)->flags |= EDGE_IRREDUCIBLE_LOOP;
449     }
450   else
451     {
452       switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP;
453       EDGE_SUCC (switch_bb, 0)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
454       EDGE_SUCC (switch_bb, 1)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
455     }
456 
457   /* Loopify from the copy of LOOP body, constructing the new loop.  */
458   nloop = loopify (latch_edge,
459 		   single_pred_edge (get_bb_copy (loop->header)), switch_bb,
460 		   BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true,
461 		   prob, REG_BR_PROB_BASE - prob);
462 
463   copy_loop_info (loop, nloop);
464   /* Remove branches that are now unreachable in new loops.  */
465   remove_path (true_edge);
466   remove_path (false_edge);
467 
468   /* Preserve the simple loop preheaders.  */
469   split_edge (loop_preheader_edge (loop));
470   split_edge (loop_preheader_edge (nloop));
471 
472   return nloop;
473 }
474