1 /* Code sinking for trees
2    Copyright (C) 2001, 2002, 2003, 2004, 2007, 2008, 2009, 2010
3    Free Software Foundation, Inc.
4    Contributed by Daniel Berlin <dan@dberlin.org>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "gimple-pretty-print.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
31 #include "gimple.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "fibheap.h"
35 #include "hashtab.h"
36 #include "tree-iterator.h"
37 #include "alloc-pool.h"
38 #include "tree-pass.h"
39 #include "flags.h"
40 #include "bitmap.h"
41 #include "langhooks.h"
42 #include "cfgloop.h"
43 #include "params.h"
44 
45 /* TODO:
46    1. Sinking store only using scalar promotion (IE without moving the RHS):
47 
48    *q = p;
49    p = p + 1;
50    if (something)
51      *q = <not p>;
52    else
53      y = *q;
54 
55 
56    should become
57    sinktemp = p;
58    p = p + 1;
59    if (something)
60      *q = <not p>;
61    else
62    {
63      *q = sinktemp;
64      y = *q
65    }
66    Store copy propagation will take care of the store elimination above.
67 
68 
69    2. Sinking using Partial Dead Code Elimination.  */
70 
71 
72 static struct
73 {
74   /* The number of statements sunk down the flowgraph by code sinking.  */
75   int sunk;
76 
77 } sink_stats;
78 
79 
80 /* Given a PHI, and one of its arguments (DEF), find the edge for
81    that argument and return it.  If the argument occurs twice in the PHI node,
82    we return NULL.  */
83 
84 static basic_block
85 find_bb_for_arg (gimple phi, tree def)
86 {
87   size_t i;
88   bool foundone = false;
89   basic_block result = NULL;
90   for (i = 0; i < gimple_phi_num_args (phi); i++)
91     if (PHI_ARG_DEF (phi, i) == def)
92       {
93 	if (foundone)
94 	  return NULL;
95 	foundone = true;
96 	result = gimple_phi_arg_edge (phi, i)->src;
97       }
98   return result;
99 }
100 
101 /* When the first immediate use is in a statement, then return true if all
102    immediate uses in IMM are in the same statement.
103    We could also do the case where  the first immediate use is in a phi node,
104    and all the other uses are in phis in the same basic block, but this
105    requires some expensive checking later (you have to make sure no def/vdef
106    in the statement occurs for multiple edges in the various phi nodes it's
107    used in, so that you only have one place you can sink it to.  */
108 
109 static bool
110 all_immediate_uses_same_place (gimple stmt)
111 {
112   gimple firstuse = NULL;
113   ssa_op_iter op_iter;
114   imm_use_iterator imm_iter;
115   use_operand_p use_p;
116   tree var;
117 
118   FOR_EACH_SSA_TREE_OPERAND (var, stmt, op_iter, SSA_OP_ALL_DEFS)
119     {
120       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
121         {
122 	  if (is_gimple_debug (USE_STMT (use_p)))
123 	    continue;
124 	  if (firstuse == NULL)
125 	    firstuse = USE_STMT (use_p);
126 	  else
127 	    if (firstuse != USE_STMT (use_p))
128 	      return false;
129 	}
130     }
131 
132   return true;
133 }
134 
135 /* Some global stores don't necessarily have VDEF's of global variables,
136    but we still must avoid moving them around.  */
137 
138 bool
139 is_hidden_global_store (gimple stmt)
140 {
141   /* Check virtual definitions.  If we get here, the only virtual
142      definitions we should see are those generated by assignment or call
143      statements.  */
144   if (gimple_vdef (stmt))
145     {
146       tree lhs;
147 
148       gcc_assert (is_gimple_assign (stmt) || is_gimple_call (stmt));
149 
150       /* Note that we must not check the individual virtual operands
151 	 here.  In particular, if this is an aliased store, we could
152 	 end up with something like the following (SSA notation
153 	 redacted for brevity):
154 
155 	 	foo (int *p, int i)
156 		{
157 		  int x;
158 		  p_1 = (i_2 > 3) ? &x : p;
159 
160 		  # x_4 = VDEF <x_3>
161 		  *p_1 = 5;
162 
163 		  return 2;
164 		}
165 
166 	 Notice that the store to '*p_1' should be preserved, if we
167 	 were to check the virtual definitions in that store, we would
168 	 not mark it needed.  This is because 'x' is not a global
169 	 variable.
170 
171 	 Therefore, we check the base address of the LHS.  If the
172 	 address is a pointer, we check if its name tag or symbol tag is
173 	 a global variable.  Otherwise, we check if the base variable
174 	 is a global.  */
175       lhs = gimple_get_lhs (stmt);
176 
177       if (REFERENCE_CLASS_P (lhs))
178 	lhs = get_base_address (lhs);
179 
180       if (lhs == NULL_TREE)
181 	{
182 	  /* If LHS is NULL, it means that we couldn't get the base
183 	     address of the reference.  In which case, we should not
184 	     move this store.  */
185 	  return true;
186 	}
187       else if (DECL_P (lhs))
188 	{
189 	  /* If the store is to a global symbol, we need to keep it.  */
190 	  if (is_global_var (lhs))
191 	    return true;
192 
193 	}
194       else if (INDIRECT_REF_P (lhs)
195 	       || TREE_CODE (lhs) == MEM_REF
196 	       || TREE_CODE (lhs) == TARGET_MEM_REF)
197 	return ptr_deref_may_alias_global_p (TREE_OPERAND (lhs, 0));
198       else if (CONSTANT_CLASS_P (lhs))
199 	return true;
200       else
201 	gcc_unreachable ();
202     }
203 
204   return false;
205 }
206 
207 /* Find the nearest common dominator of all of the immediate uses in IMM.  */
208 
209 static basic_block
210 nearest_common_dominator_of_uses (gimple stmt, bool *debug_stmts)
211 {
212   bitmap blocks = BITMAP_ALLOC (NULL);
213   basic_block commondom;
214   unsigned int j;
215   bitmap_iterator bi;
216   ssa_op_iter op_iter;
217   imm_use_iterator imm_iter;
218   use_operand_p use_p;
219   tree var;
220 
221   bitmap_clear (blocks);
222   FOR_EACH_SSA_TREE_OPERAND (var, stmt, op_iter, SSA_OP_ALL_DEFS)
223     {
224       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
225         {
226 	  gimple usestmt = USE_STMT (use_p);
227 	  basic_block useblock;
228 
229 	  if (gimple_code (usestmt) == GIMPLE_PHI)
230 	    {
231 	      int idx = PHI_ARG_INDEX_FROM_USE (use_p);
232 
233 	      useblock = gimple_phi_arg_edge (usestmt, idx)->src;
234 	    }
235 	  else if (is_gimple_debug (usestmt))
236 	    {
237 	      *debug_stmts = true;
238 	      continue;
239 	    }
240 	  else
241 	    {
242 	      useblock = gimple_bb (usestmt);
243 	    }
244 
245 	  /* Short circuit. Nothing dominates the entry block.  */
246 	  if (useblock == ENTRY_BLOCK_PTR)
247 	    {
248 	      BITMAP_FREE (blocks);
249 	      return NULL;
250 	    }
251 	  bitmap_set_bit (blocks, useblock->index);
252 	}
253     }
254   commondom = BASIC_BLOCK (bitmap_first_set_bit (blocks));
255   EXECUTE_IF_SET_IN_BITMAP (blocks, 0, j, bi)
256     commondom = nearest_common_dominator (CDI_DOMINATORS, commondom,
257 					  BASIC_BLOCK (j));
258   BITMAP_FREE (blocks);
259   return commondom;
260 }
261 
262 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
263    tree, return the best basic block between them (inclusive) to place
264    statements.
265 
266    We want the most control dependent block in the shallowest loop nest.
267 
268    If the resulting block is in a shallower loop nest, then use it.  Else
269    only use the resulting block if it has significantly lower execution
270    frequency than EARLY_BB to avoid gratutious statement movement.  We
271    consider statements with VOPS more desirable to move.
272 
273    This pass would obviously benefit from PDO as it utilizes block
274    frequencies.  It would also benefit from recomputing frequencies
275    if profile data is not available since frequencies often get out
276    of sync with reality.  */
277 
278 static basic_block
279 select_best_block (basic_block early_bb,
280 		   basic_block late_bb,
281 		   gimple stmt)
282 {
283   basic_block best_bb = late_bb;
284   basic_block temp_bb = late_bb;
285   int threshold;
286 
287   while (temp_bb != early_bb)
288     {
289       /* If we've moved into a lower loop nest, then that becomes
290 	 our best block.  */
291       if (temp_bb->loop_depth < best_bb->loop_depth)
292 	best_bb = temp_bb;
293 
294       /* Walk up the dominator tree, hopefully we'll find a shallower
295  	 loop nest.  */
296       temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
297     }
298 
299   /* If we found a shallower loop nest, then we always consider that
300      a win.  This will always give us the most control dependent block
301      within that loop nest.  */
302   if (best_bb->loop_depth < early_bb->loop_depth)
303     return best_bb;
304 
305   /* Get the sinking threshold.  If the statement to be moved has memory
306      operands, then increase the threshold by 7% as those are even more
307      profitable to avoid, clamping at 100%.  */
308   threshold = PARAM_VALUE (PARAM_SINK_FREQUENCY_THRESHOLD);
309   if (gimple_vuse (stmt) || gimple_vdef (stmt))
310     {
311       threshold += 7;
312       if (threshold > 100)
313 	threshold = 100;
314     }
315 
316   /* If BEST_BB is at the same nesting level, then require it to have
317      significantly lower execution frequency to avoid gratutious movement.  */
318   if (best_bb->loop_depth == early_bb->loop_depth
319       && best_bb->frequency < (early_bb->frequency * threshold / 100.0))
320     return best_bb;
321 
322   /* No better block found, so return EARLY_BB, which happens to be the
323      statement's original block.  */
324   return early_bb;
325 }
326 
327 /* Given a statement (STMT) and the basic block it is currently in (FROMBB),
328    determine the location to sink the statement to, if any.
329    Returns true if there is such location; in that case, TOGSI points to the
330    statement before that STMT should be moved.  */
331 
332 static bool
333 statement_sink_location (gimple stmt, basic_block frombb,
334 			 gimple_stmt_iterator *togsi)
335 {
336   gimple use;
337   use_operand_p one_use = NULL_USE_OPERAND_P;
338   basic_block sinkbb;
339   use_operand_p use_p;
340   def_operand_p def_p;
341   ssa_op_iter iter;
342   imm_use_iterator imm_iter;
343 
344   /* We only can sink assignments.  */
345   if (!is_gimple_assign (stmt))
346     return false;
347 
348   /* We only can sink stmts with a single definition.  */
349   def_p = single_ssa_def_operand (stmt, SSA_OP_ALL_DEFS);
350   if (def_p == NULL_DEF_OPERAND_P)
351     return false;
352 
353   /* Return if there are no immediate uses of this stmt.  */
354   if (has_zero_uses (DEF_FROM_PTR (def_p)))
355     return false;
356 
357   /* There are a few classes of things we can't or don't move, some because we
358      don't have code to handle it, some because it's not profitable and some
359      because it's not legal.
360 
361      We can't sink things that may be global stores, at least not without
362      calculating a lot more information, because we may cause it to no longer
363      be seen by an external routine that needs it depending on where it gets
364      moved to.
365 
366      We don't want to sink loads from memory.
367 
368      We can't sink statements that end basic blocks without splitting the
369      incoming edge for the sink location to place it there.
370 
371      We can't sink statements that have volatile operands.
372 
373      We don't want to sink dead code, so anything with 0 immediate uses is not
374      sunk.
375 
376      Don't sink BLKmode assignments if current function has any local explicit
377      register variables, as BLKmode assignments may involve memcpy or memset
378      calls or, on some targets, inline expansion thereof that sometimes need
379      to use specific hard registers.
380 
381   */
382   if (stmt_ends_bb_p (stmt)
383       || gimple_has_side_effects (stmt)
384       || gimple_has_volatile_ops (stmt)
385       || (gimple_vuse (stmt) && !gimple_vdef (stmt))
386       || (cfun->has_local_explicit_reg_vars
387 	  && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode))
388     return false;
389 
390   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (DEF_FROM_PTR (def_p)))
391     return false;
392 
393   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
394     {
395       tree use = USE_FROM_PTR (use_p);
396       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use))
397 	return false;
398     }
399 
400   use = NULL;
401 
402   /* If stmt is a store the one and only use needs to be the VOP
403      merging PHI node.  */
404   if (gimple_vdef (stmt))
405     {
406       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
407 	{
408 	  gimple use_stmt = USE_STMT (use_p);
409 
410 	  /* A killing definition is not a use.  */
411 	  if (gimple_assign_single_p (use_stmt)
412 	      && gimple_vdef (use_stmt)
413 	      && operand_equal_p (gimple_assign_lhs (stmt),
414 				  gimple_assign_lhs (use_stmt), 0))
415 	    continue;
416 
417 	  if (gimple_code (use_stmt) != GIMPLE_PHI)
418 	    return false;
419 
420 	  if (use
421 	      && use != use_stmt)
422 	    return false;
423 
424 	  use = use_stmt;
425 	}
426       if (!use)
427 	return false;
428     }
429   /* If all the immediate uses are not in the same place, find the nearest
430      common dominator of all the immediate uses.  For PHI nodes, we have to
431      find the nearest common dominator of all of the predecessor blocks, since
432      that is where insertion would have to take place.  */
433   else if (!all_immediate_uses_same_place (stmt))
434     {
435       bool debug_stmts = false;
436       basic_block commondom = nearest_common_dominator_of_uses (stmt,
437 								&debug_stmts);
438 
439       if (commondom == frombb)
440 	return false;
441 
442       /* Our common dominator has to be dominated by frombb in order to be a
443 	 trivially safe place to put this statement, since it has multiple
444 	 uses.  */
445       if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb))
446 	return false;
447 
448       commondom = select_best_block (frombb, commondom, stmt);
449 
450       if (commondom == frombb)
451 	return false;
452 
453       *togsi = gsi_after_labels (commondom);
454 
455       return true;
456     }
457   else
458     {
459       FOR_EACH_IMM_USE_FAST (one_use, imm_iter, DEF_FROM_PTR (def_p))
460 	{
461 	  if (is_gimple_debug (USE_STMT (one_use)))
462 	    continue;
463 	  break;
464 	}
465       use = USE_STMT (one_use);
466 
467       if (gimple_code (use) != GIMPLE_PHI)
468 	{
469 	  sinkbb = gimple_bb (use);
470 	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
471 
472 	  if (sinkbb == frombb)
473 	    return false;
474 
475 	  *togsi = gsi_for_stmt (use);
476 
477 	  return true;
478 	}
479     }
480 
481   sinkbb = find_bb_for_arg (use, DEF_FROM_PTR (def_p));
482 
483   /* This can happen if there are multiple uses in a PHI.  */
484   if (!sinkbb)
485     return false;
486 
487   sinkbb = select_best_block (frombb, sinkbb, stmt);
488   if (!sinkbb || sinkbb == frombb)
489     return false;
490 
491   /* If the latch block is empty, don't make it non-empty by sinking
492      something into it.  */
493   if (sinkbb == frombb->loop_father->latch
494       && empty_block_p (sinkbb))
495     return false;
496 
497   *togsi = gsi_after_labels (sinkbb);
498 
499   return true;
500 }
501 
502 /* Perform code sinking on BB */
503 
504 static void
505 sink_code_in_bb (basic_block bb)
506 {
507   basic_block son;
508   gimple_stmt_iterator gsi;
509   edge_iterator ei;
510   edge e;
511   bool last = true;
512 
513   /* If this block doesn't dominate anything, there can't be any place to sink
514      the statements to.  */
515   if (first_dom_son (CDI_DOMINATORS, bb) == NULL)
516     goto earlyout;
517 
518   /* We can't move things across abnormal edges, so don't try.  */
519   FOR_EACH_EDGE (e, ei, bb->succs)
520     if (e->flags & EDGE_ABNORMAL)
521       goto earlyout;
522 
523   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
524     {
525       gimple stmt = gsi_stmt (gsi);
526       gimple_stmt_iterator togsi;
527 
528       if (!statement_sink_location (stmt, bb, &togsi))
529 	{
530 	  if (!gsi_end_p (gsi))
531 	    gsi_prev (&gsi);
532 	  last = false;
533 	  continue;
534 	}
535       if (dump_file)
536 	{
537 	  fprintf (dump_file, "Sinking ");
538 	  print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
539 	  fprintf (dump_file, " from bb %d to bb %d\n",
540 		   bb->index, (gsi_bb (togsi))->index);
541 	}
542 
543       /* Update virtual operands of statements in the path we
544          do not sink to.  */
545       if (gimple_vdef (stmt))
546 	{
547 	  imm_use_iterator iter;
548 	  use_operand_p use_p;
549 	  gimple vuse_stmt;
550 
551 	  FOR_EACH_IMM_USE_STMT (vuse_stmt, iter, gimple_vdef (stmt))
552 	    if (gimple_code (vuse_stmt) != GIMPLE_PHI)
553 	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
554 		SET_USE (use_p, gimple_vuse (stmt));
555 	}
556 
557       /* If this is the end of the basic block, we need to insert at the end
558          of the basic block.  */
559       if (gsi_end_p (togsi))
560 	gsi_move_to_bb_end (&gsi, gsi_bb (togsi));
561       else
562 	gsi_move_before (&gsi, &togsi);
563 
564       sink_stats.sunk++;
565 
566       /* If we've just removed the last statement of the BB, the
567 	 gsi_end_p() test below would fail, but gsi_prev() would have
568 	 succeeded, and we want it to succeed.  So we keep track of
569 	 whether we're at the last statement and pick up the new last
570 	 statement.  */
571       if (last)
572 	{
573 	  gsi = gsi_last_bb (bb);
574 	  continue;
575 	}
576 
577       last = false;
578       if (!gsi_end_p (gsi))
579 	gsi_prev (&gsi);
580 
581     }
582  earlyout:
583   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
584        son;
585        son = next_dom_son (CDI_POST_DOMINATORS, son))
586     {
587       sink_code_in_bb (son);
588     }
589 }
590 
591 /* Perform code sinking.
592    This moves code down the flowgraph when we know it would be
593    profitable to do so, or it wouldn't increase the number of
594    executions of the statement.
595 
596    IE given
597 
598    a_1 = b + c;
599    if (<something>)
600    {
601    }
602    else
603    {
604      foo (&b, &c);
605      a_5 = b + c;
606    }
607    a_6 = PHI (a_5, a_1);
608    USE a_6.
609 
610    we'll transform this into:
611 
612    if (<something>)
613    {
614       a_1 = b + c;
615    }
616    else
617    {
618       foo (&b, &c);
619       a_5 = b + c;
620    }
621    a_6 = PHI (a_5, a_1);
622    USE a_6.
623 
624    Note that this reduces the number of computations of a = b + c to 1
625    when we take the else edge, instead of 2.
626 */
627 static void
628 execute_sink_code (void)
629 {
630   loop_optimizer_init (LOOPS_NORMAL);
631 
632   connect_infinite_loops_to_exit ();
633   memset (&sink_stats, 0, sizeof (sink_stats));
634   calculate_dominance_info (CDI_DOMINATORS);
635   calculate_dominance_info (CDI_POST_DOMINATORS);
636   sink_code_in_bb (EXIT_BLOCK_PTR);
637   statistics_counter_event (cfun, "Sunk statements", sink_stats.sunk);
638   free_dominance_info (CDI_POST_DOMINATORS);
639   remove_fake_exit_edges ();
640   loop_optimizer_finalize ();
641 }
642 
643 /* Gate and execute functions for PRE.  */
644 
645 static unsigned int
646 do_sink (void)
647 {
648   execute_sink_code ();
649   return 0;
650 }
651 
652 static bool
653 gate_sink (void)
654 {
655   return flag_tree_sink != 0;
656 }
657 
658 struct gimple_opt_pass pass_sink_code =
659 {
660  {
661   GIMPLE_PASS,
662   "sink",				/* name */
663   gate_sink,				/* gate */
664   do_sink,				/* execute */
665   NULL,					/* sub */
666   NULL,					/* next */
667   0,					/* static_pass_number */
668   TV_TREE_SINK,				/* tv_id */
669   PROP_no_crit_edges | PROP_cfg
670     | PROP_ssa,				/* properties_required */
671   0,					/* properties_provided */
672   0,					/* properties_destroyed */
673   0,					/* todo_flags_start */
674   TODO_update_ssa
675     | TODO_verify_ssa
676     | TODO_verify_flow
677     | TODO_ggc_collect			/* todo_flags_finish */
678  }
679 };
680