1 /* Code sinking for trees
2    Copyright (C) 2001-2020 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "gimple-pretty-print.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
33 #include "cfganal.h"
34 #include "gimple-iterator.h"
35 #include "tree-cfg.h"
36 #include "cfgloop.h"
37 
38 /* TODO:
39    1. Sinking store only using scalar promotion (IE without moving the RHS):
40 
41    *q = p;
42    p = p + 1;
43    if (something)
44      *q = <not p>;
45    else
46      y = *q;
47 
48 
49    should become
50    sinktemp = p;
51    p = p + 1;
52    if (something)
53      *q = <not p>;
54    else
55    {
56      *q = sinktemp;
57      y = *q
58    }
59    Store copy propagation will take care of the store elimination above.
60 
61 
62    2. Sinking using Partial Dead Code Elimination.  */
63 
64 
65 static struct
66 {
67   /* The number of statements sunk down the flowgraph by code sinking.  */
68   int sunk;
69 
70 } sink_stats;
71 
72 
73 /* Given a PHI, and one of its arguments (DEF), find the edge for
74    that argument and return it.  If the argument occurs twice in the PHI node,
75    we return NULL.  */
76 
77 static basic_block
find_bb_for_arg(gphi * phi,tree def)78 find_bb_for_arg (gphi *phi, tree def)
79 {
80   size_t i;
81   bool foundone = false;
82   basic_block result = NULL;
83   for (i = 0; i < gimple_phi_num_args (phi); i++)
84     if (PHI_ARG_DEF (phi, i) == def)
85       {
86 	if (foundone)
87 	  return NULL;
88 	foundone = true;
89 	result = gimple_phi_arg_edge (phi, i)->src;
90       }
91   return result;
92 }
93 
94 /* When the first immediate use is in a statement, then return true if all
95    immediate uses in IMM are in the same statement.
96    We could also do the case where  the first immediate use is in a phi node,
97    and all the other uses are in phis in the same basic block, but this
98    requires some expensive checking later (you have to make sure no def/vdef
99    in the statement occurs for multiple edges in the various phi nodes it's
100    used in, so that you only have one place you can sink it to.  */
101 
102 static bool
all_immediate_uses_same_place(def_operand_p def_p)103 all_immediate_uses_same_place (def_operand_p def_p)
104 {
105   tree var = DEF_FROM_PTR (def_p);
106   imm_use_iterator imm_iter;
107   use_operand_p use_p;
108 
109   gimple *firstuse = NULL;
110   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
111     {
112       if (is_gimple_debug (USE_STMT (use_p)))
113 	continue;
114       if (firstuse == NULL)
115 	firstuse = USE_STMT (use_p);
116       else
117 	if (firstuse != USE_STMT (use_p))
118 	  return false;
119     }
120 
121   return true;
122 }
123 
124 /* Find the nearest common dominator of all of the immediate uses in IMM.  */
125 
126 static basic_block
nearest_common_dominator_of_uses(def_operand_p def_p,bool * debug_stmts)127 nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
128 {
129   tree var = DEF_FROM_PTR (def_p);
130   auto_bitmap blocks;
131   basic_block commondom;
132   unsigned int j;
133   bitmap_iterator bi;
134   imm_use_iterator imm_iter;
135   use_operand_p use_p;
136 
137   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
138     {
139       gimple *usestmt = USE_STMT (use_p);
140       basic_block useblock;
141 
142       if (gphi *phi = dyn_cast <gphi *> (usestmt))
143 	{
144 	  int idx = PHI_ARG_INDEX_FROM_USE (use_p);
145 
146 	  useblock = gimple_phi_arg_edge (phi, idx)->src;
147 	}
148       else if (is_gimple_debug (usestmt))
149 	{
150 	  *debug_stmts = true;
151 	  continue;
152 	}
153       else
154 	{
155 	  useblock = gimple_bb (usestmt);
156 	}
157 
158       /* Short circuit. Nothing dominates the entry block.  */
159       if (useblock == ENTRY_BLOCK_PTR_FOR_FN (cfun))
160 	return NULL;
161 
162       bitmap_set_bit (blocks, useblock->index);
163     }
164   commondom = BASIC_BLOCK_FOR_FN (cfun, bitmap_first_set_bit (blocks));
165   EXECUTE_IF_SET_IN_BITMAP (blocks, 0, j, bi)
166     commondom = nearest_common_dominator (CDI_DOMINATORS, commondom,
167 					  BASIC_BLOCK_FOR_FN (cfun, j));
168   return commondom;
169 }
170 
171 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
172    tree, return the best basic block between them (inclusive) to place
173    statements.
174 
175    We want the most control dependent block in the shallowest loop nest.
176 
177    If the resulting block is in a shallower loop nest, then use it.  Else
178    only use the resulting block if it has significantly lower execution
179    frequency than EARLY_BB to avoid gratuitous statement movement.  We
180    consider statements with VOPS more desirable to move.
181 
182    This pass would obviously benefit from PDO as it utilizes block
183    frequencies.  It would also benefit from recomputing frequencies
184    if profile data is not available since frequencies often get out
185    of sync with reality.  */
186 
187 static basic_block
select_best_block(basic_block early_bb,basic_block late_bb,gimple * stmt)188 select_best_block (basic_block early_bb,
189 		   basic_block late_bb,
190 		   gimple *stmt)
191 {
192   basic_block best_bb = late_bb;
193   basic_block temp_bb = late_bb;
194   int threshold;
195 
196   while (temp_bb != early_bb)
197     {
198       /* If we've moved into a lower loop nest, then that becomes
199 	 our best block.  */
200       if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
201 	best_bb = temp_bb;
202 
203       /* Walk up the dominator tree, hopefully we'll find a shallower
204  	 loop nest.  */
205       temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
206     }
207 
208   /* If we found a shallower loop nest, then we always consider that
209      a win.  This will always give us the most control dependent block
210      within that loop nest.  */
211   if (bb_loop_depth (best_bb) < bb_loop_depth (early_bb))
212     return best_bb;
213 
214   /* Get the sinking threshold.  If the statement to be moved has memory
215      operands, then increase the threshold by 7% as those are even more
216      profitable to avoid, clamping at 100%.  */
217   threshold = param_sink_frequency_threshold;
218   if (gimple_vuse (stmt) || gimple_vdef (stmt))
219     {
220       threshold += 7;
221       if (threshold > 100)
222 	threshold = 100;
223     }
224 
225   /* If BEST_BB is at the same nesting level, then require it to have
226      significantly lower execution frequency to avoid gratuitous movement.  */
227   if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
228       /* If result of comparsion is unknown, prefer EARLY_BB.
229 	 Thus use !(...>=..) rather than (...<...)  */
230       && !(best_bb->count.apply_scale (100, 1)
231 	   >= early_bb->count.apply_scale (threshold, 1)))
232     return best_bb;
233 
234   /* No better block found, so return EARLY_BB, which happens to be the
235      statement's original block.  */
236   return early_bb;
237 }
238 
239 /* Given a statement (STMT) and the basic block it is currently in (FROMBB),
240    determine the location to sink the statement to, if any.
241    Returns true if there is such location; in that case, TOGSI points to the
242    statement before that STMT should be moved.  */
243 
244 static bool
statement_sink_location(gimple * stmt,basic_block frombb,gimple_stmt_iterator * togsi,bool * zero_uses_p)245 statement_sink_location (gimple *stmt, basic_block frombb,
246 			 gimple_stmt_iterator *togsi, bool *zero_uses_p)
247 {
248   gimple *use;
249   use_operand_p one_use = NULL_USE_OPERAND_P;
250   basic_block sinkbb;
251   use_operand_p use_p;
252   def_operand_p def_p;
253   ssa_op_iter iter;
254   imm_use_iterator imm_iter;
255 
256   *zero_uses_p = false;
257 
258   /* We only can sink assignments and non-looping const/pure calls.  */
259   int cf;
260   if (!is_gimple_assign (stmt)
261       && (!is_gimple_call (stmt)
262 	  || !((cf = gimple_call_flags (stmt)) & (ECF_CONST|ECF_PURE))
263 	  || (cf & ECF_LOOPING_CONST_OR_PURE)))
264     return false;
265 
266   /* We only can sink stmts with a single definition.  */
267   def_p = single_ssa_def_operand (stmt, SSA_OP_ALL_DEFS);
268   if (def_p == NULL_DEF_OPERAND_P)
269     return false;
270 
271   /* There are a few classes of things we can't or don't move, some because we
272      don't have code to handle it, some because it's not profitable and some
273      because it's not legal.
274 
275      We can't sink things that may be global stores, at least not without
276      calculating a lot more information, because we may cause it to no longer
277      be seen by an external routine that needs it depending on where it gets
278      moved to.
279 
280      We can't sink statements that end basic blocks without splitting the
281      incoming edge for the sink location to place it there.
282 
283      We can't sink statements that have volatile operands.
284 
285      We don't want to sink dead code, so anything with 0 immediate uses is not
286      sunk.
287 
288      Don't sink BLKmode assignments if current function has any local explicit
289      register variables, as BLKmode assignments may involve memcpy or memset
290      calls or, on some targets, inline expansion thereof that sometimes need
291      to use specific hard registers.
292 
293   */
294   if (stmt_ends_bb_p (stmt)
295       || gimple_has_side_effects (stmt)
296       || (cfun->has_local_explicit_reg_vars
297 	  && TYPE_MODE (TREE_TYPE (gimple_get_lhs (stmt))) == BLKmode))
298     return false;
299 
300   /* Return if there are no immediate uses of this stmt.  */
301   if (has_zero_uses (DEF_FROM_PTR (def_p)))
302     {
303       *zero_uses_p = true;
304       return false;
305     }
306 
307   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (DEF_FROM_PTR (def_p)))
308     return false;
309 
310   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
311     {
312       tree use = USE_FROM_PTR (use_p);
313       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use))
314 	return false;
315     }
316 
317   use = NULL;
318 
319   /* If stmt is a store the one and only use needs to be the VOP
320      merging PHI node.  */
321   if (virtual_operand_p (DEF_FROM_PTR (def_p)))
322     {
323       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
324 	{
325 	  gimple *use_stmt = USE_STMT (use_p);
326 
327 	  /* A killing definition is not a use.  */
328 	  if ((gimple_has_lhs (use_stmt)
329 	       && operand_equal_p (gimple_get_lhs (stmt),
330 				   gimple_get_lhs (use_stmt), 0))
331 	      || stmt_kills_ref_p (use_stmt, gimple_get_lhs (stmt)))
332 	    {
333 	      /* If use_stmt is or might be a nop assignment then USE_STMT
334 	         acts as a use as well as definition.  */
335 	      if (stmt != use_stmt
336 		  && ref_maybe_used_by_stmt_p (use_stmt,
337 					       gimple_get_lhs (stmt)))
338 		return false;
339 	      continue;
340 	    }
341 
342 	  if (gimple_code (use_stmt) != GIMPLE_PHI)
343 	    return false;
344 
345 	  if (use
346 	      && use != use_stmt)
347 	    return false;
348 
349 	  use = use_stmt;
350 	}
351       if (!use)
352 	return false;
353     }
354   /* If all the immediate uses are not in the same place, find the nearest
355      common dominator of all the immediate uses.  For PHI nodes, we have to
356      find the nearest common dominator of all of the predecessor blocks, since
357      that is where insertion would have to take place.  */
358   else if (gimple_vuse (stmt)
359 	   || !all_immediate_uses_same_place (def_p))
360     {
361       bool debug_stmts = false;
362       basic_block commondom = nearest_common_dominator_of_uses (def_p,
363 								&debug_stmts);
364 
365       if (commondom == frombb)
366 	return false;
367 
368       /* If this is a load then do not sink past any stores.
369 	 ???  This is overly simple but cheap.  We basically look
370 	 for an existing load with the same VUSE in the path to one
371 	 of the sink candidate blocks and we adjust commondom to the
372 	 nearest to commondom.  */
373       if (gimple_vuse (stmt))
374 	{
375 	  /* Do not sink loads from hard registers.  */
376 	  if (gimple_assign_single_p (stmt)
377 	      && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL
378 	      && DECL_HARD_REGISTER (gimple_assign_rhs1 (stmt)))
379 	    return false;
380 
381 	  imm_use_iterator imm_iter;
382 	  use_operand_p use_p;
383 	  basic_block found = NULL;
384 	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vuse (stmt))
385 	    {
386 	      gimple *use_stmt = USE_STMT (use_p);
387 	      basic_block bb = gimple_bb (use_stmt);
388 	      /* For PHI nodes the block we know sth about
389 		 is the incoming block with the use.  */
390 	      if (gimple_code (use_stmt) == GIMPLE_PHI)
391 		bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
392 	      /* Any dominator of commondom would be ok with
393 	         adjusting commondom to that block.  */
394 	      bb = nearest_common_dominator (CDI_DOMINATORS, bb, commondom);
395 	      if (!found)
396 		found = bb;
397 	      else if (dominated_by_p (CDI_DOMINATORS, bb, found))
398 		found = bb;
399 	      /* If we can't improve, stop.  */
400 	      if (found == commondom)
401 		break;
402 	    }
403 	  commondom = found;
404 	  if (commondom == frombb)
405 	    return false;
406 	}
407 
408       /* Our common dominator has to be dominated by frombb in order to be a
409 	 trivially safe place to put this statement, since it has multiple
410 	 uses.  */
411       if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb))
412 	return false;
413 
414       commondom = select_best_block (frombb, commondom, stmt);
415 
416       if (commondom == frombb)
417 	return false;
418 
419       *togsi = gsi_after_labels (commondom);
420 
421       return true;
422     }
423   else
424     {
425       FOR_EACH_IMM_USE_FAST (one_use, imm_iter, DEF_FROM_PTR (def_p))
426 	{
427 	  if (is_gimple_debug (USE_STMT (one_use)))
428 	    continue;
429 	  break;
430 	}
431       use = USE_STMT (one_use);
432 
433       if (gimple_code (use) != GIMPLE_PHI)
434 	{
435 	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
436 
437 	  if (sinkbb == frombb)
438 	    return false;
439 
440 	  if (sinkbb == gimple_bb (use))
441 	    *togsi = gsi_for_stmt (use);
442 	  else
443 	    *togsi = gsi_after_labels (sinkbb);
444 
445 	  return true;
446 	}
447     }
448 
449   sinkbb = find_bb_for_arg (as_a <gphi *> (use), DEF_FROM_PTR (def_p));
450 
451   /* This can happen if there are multiple uses in a PHI.  */
452   if (!sinkbb)
453     return false;
454 
455   sinkbb = select_best_block (frombb, sinkbb, stmt);
456   if (!sinkbb || sinkbb == frombb)
457     return false;
458 
459   /* If the latch block is empty, don't make it non-empty by sinking
460      something into it.  */
461   if (sinkbb == frombb->loop_father->latch
462       && empty_block_p (sinkbb))
463     return false;
464 
465   *togsi = gsi_after_labels (sinkbb);
466 
467   return true;
468 }
469 
470 /* Perform code sinking on BB */
471 
472 static void
sink_code_in_bb(basic_block bb)473 sink_code_in_bb (basic_block bb)
474 {
475   basic_block son;
476   gimple_stmt_iterator gsi;
477   edge_iterator ei;
478   edge e;
479   bool last = true;
480 
481   /* If this block doesn't dominate anything, there can't be any place to sink
482      the statements to.  */
483   if (first_dom_son (CDI_DOMINATORS, bb) == NULL)
484     goto earlyout;
485 
486   /* We can't move things across abnormal edges, so don't try.  */
487   FOR_EACH_EDGE (e, ei, bb->succs)
488     if (e->flags & EDGE_ABNORMAL)
489       goto earlyout;
490 
491   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
492     {
493       gimple *stmt = gsi_stmt (gsi);
494       gimple_stmt_iterator togsi;
495       bool zero_uses_p;
496 
497       if (!statement_sink_location (stmt, bb, &togsi, &zero_uses_p))
498 	{
499 	  gimple_stmt_iterator saved = gsi;
500 	  if (!gsi_end_p (gsi))
501 	    gsi_prev (&gsi);
502 	  /* If we face a dead stmt remove it as it possibly blocks
503 	     sinking of uses.  */
504 	  if (zero_uses_p
505 	      && ! gimple_vdef (stmt))
506 	    {
507 	      gsi_remove (&saved, true);
508 	      release_defs (stmt);
509 	    }
510 	  else
511 	    last = false;
512 	  continue;
513 	}
514       if (dump_file)
515 	{
516 	  fprintf (dump_file, "Sinking ");
517 	  print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
518 	  fprintf (dump_file, " from bb %d to bb %d\n",
519 		   bb->index, (gsi_bb (togsi))->index);
520 	}
521 
522       /* Update virtual operands of statements in the path we
523          do not sink to.  */
524       if (gimple_vdef (stmt))
525 	{
526 	  imm_use_iterator iter;
527 	  use_operand_p use_p;
528 	  gimple *vuse_stmt;
529 
530 	  FOR_EACH_IMM_USE_STMT (vuse_stmt, iter, gimple_vdef (stmt))
531 	    if (gimple_code (vuse_stmt) != GIMPLE_PHI)
532 	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
533 		SET_USE (use_p, gimple_vuse (stmt));
534 	}
535 
536       /* If this is the end of the basic block, we need to insert at the end
537          of the basic block.  */
538       if (gsi_end_p (togsi))
539 	gsi_move_to_bb_end (&gsi, gsi_bb (togsi));
540       else
541 	gsi_move_before (&gsi, &togsi);
542 
543       sink_stats.sunk++;
544 
545       /* If we've just removed the last statement of the BB, the
546 	 gsi_end_p() test below would fail, but gsi_prev() would have
547 	 succeeded, and we want it to succeed.  So we keep track of
548 	 whether we're at the last statement and pick up the new last
549 	 statement.  */
550       if (last)
551 	{
552 	  gsi = gsi_last_bb (bb);
553 	  continue;
554 	}
555 
556       last = false;
557       if (!gsi_end_p (gsi))
558 	gsi_prev (&gsi);
559 
560     }
561  earlyout:
562   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
563        son;
564        son = next_dom_son (CDI_POST_DOMINATORS, son))
565     {
566       sink_code_in_bb (son);
567     }
568 }
569 
570 /* Perform code sinking.
571    This moves code down the flowgraph when we know it would be
572    profitable to do so, or it wouldn't increase the number of
573    executions of the statement.
574 
575    IE given
576 
577    a_1 = b + c;
578    if (<something>)
579    {
580    }
581    else
582    {
583      foo (&b, &c);
584      a_5 = b + c;
585    }
586    a_6 = PHI (a_5, a_1);
587    USE a_6.
588 
589    we'll transform this into:
590 
591    if (<something>)
592    {
593       a_1 = b + c;
594    }
595    else
596    {
597       foo (&b, &c);
598       a_5 = b + c;
599    }
600    a_6 = PHI (a_5, a_1);
601    USE a_6.
602 
603    Note that this reduces the number of computations of a = b + c to 1
604    when we take the else edge, instead of 2.
605 */
606 namespace {
607 
608 const pass_data pass_data_sink_code =
609 {
610   GIMPLE_PASS, /* type */
611   "sink", /* name */
612   OPTGROUP_NONE, /* optinfo_flags */
613   TV_TREE_SINK, /* tv_id */
614   /* PROP_no_crit_edges is ensured by running split_edges_for_insertion in
615      pass_data_sink_code::execute ().  */
616   ( PROP_cfg | PROP_ssa ), /* properties_required */
617   0, /* properties_provided */
618   0, /* properties_destroyed */
619   0, /* todo_flags_start */
620   TODO_update_ssa, /* todo_flags_finish */
621 };
622 
623 class pass_sink_code : public gimple_opt_pass
624 {
625 public:
pass_sink_code(gcc::context * ctxt)626   pass_sink_code (gcc::context *ctxt)
627     : gimple_opt_pass (pass_data_sink_code, ctxt)
628   {}
629 
630   /* opt_pass methods: */
gate(function *)631   virtual bool gate (function *) { return flag_tree_sink != 0; }
632   virtual unsigned int execute (function *);
633 
634 }; // class pass_sink_code
635 
636 unsigned int
execute(function * fun)637 pass_sink_code::execute (function *fun)
638 {
639   loop_optimizer_init (LOOPS_NORMAL);
640   split_edges_for_insertion ();
641   connect_infinite_loops_to_exit ();
642   memset (&sink_stats, 0, sizeof (sink_stats));
643   calculate_dominance_info (CDI_DOMINATORS);
644   calculate_dominance_info (CDI_POST_DOMINATORS);
645   sink_code_in_bb (EXIT_BLOCK_PTR_FOR_FN (fun));
646   statistics_counter_event (fun, "Sunk statements", sink_stats.sunk);
647   free_dominance_info (CDI_POST_DOMINATORS);
648   remove_fake_exit_edges ();
649   loop_optimizer_finalize ();
650 
651   return 0;
652 }
653 
654 } // anon namespace
655 
656 gimple_opt_pass *
make_pass_sink_code(gcc::context * ctxt)657 make_pass_sink_code (gcc::context *ctxt)
658 {
659   return new pass_sink_code (ctxt);
660 }
661