xref: /dragonfly/contrib/gcc-4.7/gcc/ipa-split.c (revision e4b17023)
1 /* Function splitting pass
2    Copyright (C) 2010, 2011, 2012
3    Free Software Foundation, Inc.
4    Contributed by Jan Hubicka  <jh@suse.cz>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /* The purpose of this pass is to split function bodies to improve
23    inlining.  I.e. for function of the form:
24 
25    func (...)
26      {
27        if (cheap_test)
28 	 something_small
29        else
30 	 something_big
31      }
32 
33    Produce:
34 
35    func.part (...)
36      {
37 	something_big
38      }
39 
40    func (...)
41      {
42        if (cheap_test)
43 	 something_small
44        else
45 	 func.part (...);
46      }
47 
48    When func becomes inlinable and when cheap_test is often true, inlining func,
49    but not fund.part leads to performance improvement similar as inlining
50    original func while the code size growth is smaller.
51 
52    The pass is organized in three stages:
53    1) Collect local info about basic block into BB_INFO structure and
54       compute function body estimated size and time.
55    2) Via DFS walk find all possible basic blocks where we can split
56       and chose best one.
57    3) If split point is found, split at the specified BB by creating a clone
58       and updating function to call it.
59 
60    The decisions what functions to split are in execute_split_functions
61    and consider_split.
62 
63    There are several possible future improvements for this pass including:
64 
65    1) Splitting to break up large functions
66    2) Splitting to reduce stack frame usage
67    3) Allow split part of function to use values computed in the header part.
68       The values needs to be passed to split function, perhaps via same
69       interface as for nested functions or as argument.
70    4) Support for simple rematerialization.  I.e. when split part use
71       value computed in header from function parameter in very cheap way, we
72       can just recompute it.
73    5) Support splitting of nested functions.
74    6) Support non-SSA arguments.
75    7) There is nothing preventing us from producing multiple parts of single function
76       when needed or splitting also the parts.  */
77 
78 #include "config.h"
79 #include "system.h"
80 #include "coretypes.h"
81 #include "tree.h"
82 #include "target.h"
83 #include "cgraph.h"
84 #include "ipa-prop.h"
85 #include "tree-flow.h"
86 #include "tree-pass.h"
87 #include "flags.h"
88 #include "timevar.h"
89 #include "diagnostic.h"
90 #include "tree-dump.h"
91 #include "tree-inline.h"
92 #include "fibheap.h"
93 #include "params.h"
94 #include "gimple-pretty-print.h"
95 #include "ipa-inline.h"
96 
97 /* Per basic block info.  */
98 
99 typedef struct
100 {
101   unsigned int size;
102   unsigned int time;
103 } bb_info;
104 DEF_VEC_O(bb_info);
105 DEF_VEC_ALLOC_O(bb_info,heap);
106 
VEC(bb_info,heap)107 static VEC(bb_info, heap) *bb_info_vec;
108 
109 /* Description of split point.  */
110 
111 struct split_point
112 {
113   /* Size of the partitions.  */
114   unsigned int header_time, header_size, split_time, split_size;
115 
116   /* SSA names that need to be passed into spit function.  */
117   bitmap ssa_names_to_pass;
118 
119   /* Basic block where we split (that will become entry point of new function.  */
120   basic_block entry_bb;
121 
122   /* Basic blocks we are splitting away.  */
123   bitmap split_bbs;
124 
125   /* True when return value is computed on split part and thus it needs
126      to be returned.  */
127   bool split_part_set_retval;
128 };
129 
130 /* Best split point found.  */
131 
132 struct split_point best_split_point;
133 
134 /* Set of basic blocks that are not allowed to dominate a split point.  */
135 
136 static bitmap forbidden_dominators;
137 
138 static tree find_retval (basic_block return_bb);
139 
140 /* Callback for walk_stmt_load_store_addr_ops.  If T is non-SSA automatic
141    variable, check it if it is present in bitmap passed via DATA.  */
142 
143 static bool
test_nonssa_use(gimple stmt ATTRIBUTE_UNUSED,tree t,void * data)144 test_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t, void *data)
145 {
146   t = get_base_address (t);
147 
148   if (!t || is_gimple_reg (t))
149     return false;
150 
151   if (TREE_CODE (t) == PARM_DECL
152       || (TREE_CODE (t) == VAR_DECL
153 	  && auto_var_in_fn_p (t, current_function_decl))
154       || TREE_CODE (t) == RESULT_DECL
155       || TREE_CODE (t) == LABEL_DECL)
156     return bitmap_bit_p ((bitmap)data, DECL_UID (t));
157 
158   /* For DECL_BY_REFERENCE, the return value is actually a pointer.  We want
159      to pretend that the value pointed to is actual result decl.  */
160   if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
161       && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
162       && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
163       && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
164     return
165       bitmap_bit_p ((bitmap)data,
166 		    DECL_UID (DECL_RESULT (current_function_decl)));
167 
168   return false;
169 }
170 
171 /* Dump split point CURRENT.  */
172 
173 static void
dump_split_point(FILE * file,struct split_point * current)174 dump_split_point (FILE * file, struct split_point *current)
175 {
176   fprintf (file,
177 	   "Split point at BB %i\n"
178 	   "  header time: %i header size: %i\n"
179 	   "  split time: %i split size: %i\n  bbs: ",
180 	   current->entry_bb->index, current->header_time,
181 	   current->header_size, current->split_time, current->split_size);
182   dump_bitmap (file, current->split_bbs);
183   fprintf (file, "  SSA names to pass: ");
184   dump_bitmap (file, current->ssa_names_to_pass);
185 }
186 
187 /* Look for all BBs in header that might lead to the split part and verify
188    that they are not defining any non-SSA var used by the split part.
189    Parameters are the same as for consider_split.  */
190 
191 static bool
verify_non_ssa_vars(struct split_point * current,bitmap non_ssa_vars,basic_block return_bb)192 verify_non_ssa_vars (struct split_point *current, bitmap non_ssa_vars,
193 		     basic_block return_bb)
194 {
195   bitmap seen = BITMAP_ALLOC (NULL);
196   VEC (basic_block,heap) *worklist = NULL;
197   edge e;
198   edge_iterator ei;
199   bool ok = true;
200 
201   FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
202     if (e->src != ENTRY_BLOCK_PTR
203 	&& !bitmap_bit_p (current->split_bbs, e->src->index))
204       {
205         VEC_safe_push (basic_block, heap, worklist, e->src);
206 	bitmap_set_bit (seen, e->src->index);
207       }
208 
209   while (!VEC_empty (basic_block, worklist))
210     {
211       gimple_stmt_iterator bsi;
212       basic_block bb = VEC_pop (basic_block, worklist);
213 
214       FOR_EACH_EDGE (e, ei, bb->preds)
215 	if (e->src != ENTRY_BLOCK_PTR
216 	    && bitmap_set_bit (seen, e->src->index))
217 	  {
218 	    gcc_checking_assert (!bitmap_bit_p (current->split_bbs,
219 					        e->src->index));
220 	    VEC_safe_push (basic_block, heap, worklist, e->src);
221 	  }
222       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
223 	{
224 	  gimple stmt = gsi_stmt (bsi);
225 	  if (is_gimple_debug (stmt))
226 	    continue;
227 	  if (walk_stmt_load_store_addr_ops
228 	      (stmt, non_ssa_vars, test_nonssa_use, test_nonssa_use,
229 	       test_nonssa_use))
230 	    {
231 	      ok = false;
232 	      goto done;
233 	    }
234 	  if (gimple_code (stmt) == GIMPLE_LABEL
235 	      && test_nonssa_use (stmt, gimple_label_label (stmt),
236 				  non_ssa_vars))
237 	  {
238 	    ok = false;
239 	    goto done;
240 	  }
241 	}
242       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
243 	{
244 	  if (walk_stmt_load_store_addr_ops
245 	      (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use, test_nonssa_use,
246 	       test_nonssa_use))
247 	    {
248 	      ok = false;
249 	      goto done;
250 	    }
251 	}
252       FOR_EACH_EDGE (e, ei, bb->succs)
253 	{
254 	  if (e->dest != return_bb)
255 	    continue;
256 	  for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi);
257 	       gsi_next (&bsi))
258 	    {
259 	      gimple stmt = gsi_stmt (bsi);
260 	      tree op = gimple_phi_arg_def (stmt, e->dest_idx);
261 
262 	      if (!is_gimple_reg (gimple_phi_result (stmt)))
263 		continue;
264 	      if (TREE_CODE (op) != SSA_NAME
265 		  && test_nonssa_use (stmt, op, non_ssa_vars))
266 		{
267 		  ok = false;
268 		  goto done;
269 		}
270 	    }
271 	}
272     }
273 done:
274   BITMAP_FREE (seen);
275   VEC_free (basic_block, heap, worklist);
276   return ok;
277 }
278 
279 /* If STMT is a call, check the callee against a list of forbidden
280    predicate functions.  If a match is found, look for uses of the
281    call result in condition statements that compare against zero.
282    For each such use, find the block targeted by the condition
283    statement for the nonzero result, and set the bit for this block
284    in the forbidden dominators bitmap.  The purpose of this is to avoid
285    selecting a split point where we are likely to lose the chance
286    to optimize away an unused function call.  */
287 
288 static void
check_forbidden_calls(gimple stmt)289 check_forbidden_calls (gimple stmt)
290 {
291   imm_use_iterator use_iter;
292   use_operand_p use_p;
293   tree lhs;
294 
295   /* At the moment, __builtin_constant_p is the only forbidden
296      predicate function call (see PR49642).  */
297   if (!gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
298     return;
299 
300   lhs = gimple_call_lhs (stmt);
301 
302   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
303     return;
304 
305   FOR_EACH_IMM_USE_FAST (use_p, use_iter, lhs)
306     {
307       tree op1;
308       basic_block use_bb, forbidden_bb;
309       enum tree_code code;
310       edge true_edge, false_edge;
311       gimple use_stmt = USE_STMT (use_p);
312 
313       if (gimple_code (use_stmt) != GIMPLE_COND)
314 	continue;
315 
316       /* Assuming canonical form for GIMPLE_COND here, with constant
317 	 in second position.  */
318       op1 = gimple_cond_rhs (use_stmt);
319       code = gimple_cond_code (use_stmt);
320       use_bb = gimple_bb (use_stmt);
321 
322       extract_true_false_edges_from_block (use_bb, &true_edge, &false_edge);
323 
324       /* We're only interested in comparisons that distinguish
325 	 unambiguously from zero.  */
326       if (!integer_zerop (op1) || code == LE_EXPR || code == GE_EXPR)
327 	continue;
328 
329       if (code == EQ_EXPR)
330 	forbidden_bb = false_edge->dest;
331       else
332 	forbidden_bb = true_edge->dest;
333 
334       bitmap_set_bit (forbidden_dominators, forbidden_bb->index);
335     }
336 }
337 
338 /* If BB is dominated by any block in the forbidden dominators set,
339    return TRUE; else FALSE.  */
340 
341 static bool
dominated_by_forbidden(basic_block bb)342 dominated_by_forbidden (basic_block bb)
343 {
344   unsigned dom_bb;
345   bitmap_iterator bi;
346 
347   EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
348     {
349       if (dominated_by_p (CDI_DOMINATORS, bb, BASIC_BLOCK (dom_bb)))
350 	return true;
351     }
352 
353   return false;
354 }
355 
356 /* We found an split_point CURRENT.  NON_SSA_VARS is bitmap of all non ssa
357    variables used and RETURN_BB is return basic block.
358    See if we can split function here.  */
359 
360 static void
consider_split(struct split_point * current,bitmap non_ssa_vars,basic_block return_bb)361 consider_split (struct split_point *current, bitmap non_ssa_vars,
362 		basic_block return_bb)
363 {
364   tree parm;
365   unsigned int num_args = 0;
366   unsigned int call_overhead;
367   edge e;
368   edge_iterator ei;
369   gimple_stmt_iterator bsi;
370   unsigned int i;
371   int incoming_freq = 0;
372   tree retval;
373 
374   if (dump_file && (dump_flags & TDF_DETAILS))
375     dump_split_point (dump_file, current);
376 
377   FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
378     if (!bitmap_bit_p (current->split_bbs, e->src->index))
379       incoming_freq += EDGE_FREQUENCY (e);
380 
381   /* Do not split when we would end up calling function anyway.  */
382   if (incoming_freq
383       >= (ENTRY_BLOCK_PTR->frequency
384 	  * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
385     {
386       if (dump_file && (dump_flags & TDF_DETAILS))
387 	fprintf (dump_file,
388 		 "  Refused: incoming frequency is too large.\n");
389       return;
390     }
391 
392   if (!current->header_size)
393     {
394       if (dump_file && (dump_flags & TDF_DETAILS))
395 	fprintf (dump_file, "  Refused: header empty\n");
396       return;
397     }
398 
399   /* Verify that PHI args on entry are either virtual or all their operands
400      incoming from header are the same.  */
401   for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi))
402     {
403       gimple stmt = gsi_stmt (bsi);
404       tree val = NULL;
405 
406       if (!is_gimple_reg (gimple_phi_result (stmt)))
407 	continue;
408       for (i = 0; i < gimple_phi_num_args (stmt); i++)
409 	{
410 	  edge e = gimple_phi_arg_edge (stmt, i);
411 	  if (!bitmap_bit_p (current->split_bbs, e->src->index))
412 	    {
413 	      tree edge_val = gimple_phi_arg_def (stmt, i);
414 	      if (val && edge_val != val)
415 	        {
416 		  if (dump_file && (dump_flags & TDF_DETAILS))
417 		    fprintf (dump_file,
418 			     "  Refused: entry BB has PHI with multiple variants\n");
419 		  return;
420 	        }
421 	      val = edge_val;
422 	    }
423 	}
424     }
425 
426 
427   /* See what argument we will pass to the split function and compute
428      call overhead.  */
429   call_overhead = eni_size_weights.call_cost;
430   for (parm = DECL_ARGUMENTS (current_function_decl); parm;
431        parm = DECL_CHAIN (parm))
432     {
433       if (!is_gimple_reg (parm))
434 	{
435 	  if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm)))
436 	    {
437 	      if (dump_file && (dump_flags & TDF_DETAILS))
438 		fprintf (dump_file,
439 			 "  Refused: need to pass non-ssa param values\n");
440 	      return;
441 	    }
442 	}
443       else if (gimple_default_def (cfun, parm)
444 	       && bitmap_bit_p (current->ssa_names_to_pass,
445 				SSA_NAME_VERSION (gimple_default_def
446 						  (cfun, parm))))
447 	{
448 	  if (!VOID_TYPE_P (TREE_TYPE (parm)))
449 	    call_overhead += estimate_move_cost (TREE_TYPE (parm));
450 	  num_args++;
451 	}
452     }
453   if (!VOID_TYPE_P (TREE_TYPE (current_function_decl)))
454     call_overhead += estimate_move_cost (TREE_TYPE (current_function_decl));
455 
456   if (current->split_size <= call_overhead)
457     {
458       if (dump_file && (dump_flags & TDF_DETAILS))
459 	fprintf (dump_file,
460 		 "  Refused: split size is smaller than call overhead\n");
461       return;
462     }
463   if (current->header_size + call_overhead
464       >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl)
465 			? MAX_INLINE_INSNS_SINGLE
466 			: MAX_INLINE_INSNS_AUTO))
467     {
468       if (dump_file && (dump_flags & TDF_DETAILS))
469 	fprintf (dump_file,
470 		 "  Refused: header size is too large for inline candidate\n");
471       return;
472     }
473 
474   /* FIXME: we currently can pass only SSA function parameters to the split
475      arguments.  Once parm_adjustment infrastructure is supported by cloning,
476      we can pass more than that.  */
477   if (num_args != bitmap_count_bits (current->ssa_names_to_pass))
478     {
479 
480       if (dump_file && (dump_flags & TDF_DETAILS))
481 	fprintf (dump_file,
482 		 "  Refused: need to pass non-param values\n");
483       return;
484     }
485 
486   /* When there are non-ssa vars used in the split region, see if they
487      are used in the header region.  If so, reject the split.
488      FIXME: we can use nested function support to access both.  */
489   if (!bitmap_empty_p (non_ssa_vars)
490       && !verify_non_ssa_vars (current, non_ssa_vars, return_bb))
491     {
492       if (dump_file && (dump_flags & TDF_DETAILS))
493 	fprintf (dump_file,
494 		 "  Refused: split part has non-ssa uses\n");
495       return;
496     }
497 
498   /* If the split point is dominated by a forbidden block, reject
499      the split.  */
500   if (!bitmap_empty_p (forbidden_dominators)
501       && dominated_by_forbidden (current->entry_bb))
502     {
503       if (dump_file && (dump_flags & TDF_DETAILS))
504 	fprintf (dump_file,
505 		 "  Refused: split point dominated by forbidden block\n");
506       return;
507     }
508 
509   /* See if retval used by return bb is computed by header or split part.
510      When it is computed by split part, we need to produce return statement
511      in the split part and add code to header to pass it around.
512 
513      This is bit tricky to test:
514        1) When there is no return_bb or no return value, we always pass
515           value around.
516        2) Invariants are always computed by caller.
517        3) For SSA we need to look if defining statement is in header or split part
518        4) For non-SSA we need to look where the var is computed. */
519   retval = find_retval (return_bb);
520   if (!retval)
521     current->split_part_set_retval = true;
522   else if (is_gimple_min_invariant (retval))
523     current->split_part_set_retval = false;
524   /* Special case is value returned by reference we record as if it was non-ssa
525      set to result_decl.  */
526   else if (TREE_CODE (retval) == SSA_NAME
527 	   && TREE_CODE (SSA_NAME_VAR (retval)) == RESULT_DECL
528 	   && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
529     current->split_part_set_retval
530        = bitmap_bit_p (non_ssa_vars, DECL_UID (SSA_NAME_VAR (retval)));
531   else if (TREE_CODE (retval) == SSA_NAME)
532     current->split_part_set_retval
533       = (!SSA_NAME_IS_DEFAULT_DEF (retval)
534 	 && (bitmap_bit_p (current->split_bbs,
535 			  gimple_bb (SSA_NAME_DEF_STMT (retval))->index)
536 	     || gimple_bb (SSA_NAME_DEF_STMT (retval)) == return_bb));
537   else if (TREE_CODE (retval) == PARM_DECL)
538     current->split_part_set_retval = false;
539   else if (TREE_CODE (retval) == VAR_DECL
540 	   || TREE_CODE (retval) == RESULT_DECL)
541     current->split_part_set_retval
542       = bitmap_bit_p (non_ssa_vars, DECL_UID (retval));
543   else
544     current->split_part_set_retval = true;
545 
546   /* split_function fixes up at most one PHI non-virtual PHI node in return_bb,
547      for the return value.  If there are other PHIs, give up.  */
548   if (return_bb != EXIT_BLOCK_PTR)
549     {
550       gimple_stmt_iterator psi;
551 
552       for (psi = gsi_start_phis (return_bb); !gsi_end_p (psi); gsi_next (&psi))
553 	if (is_gimple_reg (gimple_phi_result (gsi_stmt (psi)))
554 	    && !(retval
555 		 && current->split_part_set_retval
556 		 && TREE_CODE (retval) == SSA_NAME
557 		 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
558 		 && SSA_NAME_DEF_STMT (retval) == gsi_stmt (psi)))
559 	  {
560 	    if (dump_file && (dump_flags & TDF_DETAILS))
561 	      fprintf (dump_file,
562 		       "  Refused: return bb has extra PHIs\n");
563 	    return;
564 	  }
565     }
566 
567   if (dump_file && (dump_flags & TDF_DETAILS))
568     fprintf (dump_file, "  Accepted!\n");
569 
570   /* At the moment chose split point with lowest frequency and that leaves
571      out smallest size of header.
572      In future we might re-consider this heuristics.  */
573   if (!best_split_point.split_bbs
574       || best_split_point.entry_bb->frequency > current->entry_bb->frequency
575       || (best_split_point.entry_bb->frequency == current->entry_bb->frequency
576 	  && best_split_point.split_size < current->split_size))
577 
578     {
579       if (dump_file && (dump_flags & TDF_DETAILS))
580 	fprintf (dump_file, "  New best split point!\n");
581       if (best_split_point.ssa_names_to_pass)
582 	{
583 	  BITMAP_FREE (best_split_point.ssa_names_to_pass);
584 	  BITMAP_FREE (best_split_point.split_bbs);
585 	}
586       best_split_point = *current;
587       best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL);
588       bitmap_copy (best_split_point.ssa_names_to_pass,
589 		   current->ssa_names_to_pass);
590       best_split_point.split_bbs = BITMAP_ALLOC (NULL);
591       bitmap_copy (best_split_point.split_bbs, current->split_bbs);
592     }
593 }
594 
595 /* Return basic block containing RETURN statement.  We allow basic blocks
596    of the form:
597    <retval> = tmp_var;
598    return <retval>
599    but return_bb can not be more complex than this.
600    If nothing is found, return EXIT_BLOCK_PTR.
601 
602    When there are multiple RETURN statement, chose one with return value,
603    since that one is more likely shared by multiple code paths.
604 
605    Return BB is special, because for function splitting it is the only
606    basic block that is duplicated in between header and split part of the
607    function.
608 
609    TODO: We might support multiple return blocks.  */
610 
611 static basic_block
find_return_bb(void)612 find_return_bb (void)
613 {
614   edge e;
615   basic_block return_bb = EXIT_BLOCK_PTR;
616   gimple_stmt_iterator bsi;
617   bool found_return = false;
618   tree retval = NULL_TREE;
619 
620   if (!single_pred_p (EXIT_BLOCK_PTR))
621     return return_bb;
622 
623   e = single_pred_edge (EXIT_BLOCK_PTR);
624   for (bsi = gsi_last_bb (e->src); !gsi_end_p (bsi); gsi_prev (&bsi))
625     {
626       gimple stmt = gsi_stmt (bsi);
627       if (gimple_code (stmt) == GIMPLE_LABEL
628 	  || is_gimple_debug (stmt)
629 	  || gimple_clobber_p (stmt))
630 	;
631       else if (gimple_code (stmt) == GIMPLE_ASSIGN
632 	       && found_return
633 	       && gimple_assign_single_p (stmt)
634 	       && (auto_var_in_fn_p (gimple_assign_rhs1 (stmt),
635 				     current_function_decl)
636 		   || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))
637 	       && retval == gimple_assign_lhs (stmt))
638 	;
639       else if (gimple_code (stmt) == GIMPLE_RETURN)
640 	{
641 	  found_return = true;
642 	  retval = gimple_return_retval (stmt);
643 	}
644       else
645 	break;
646     }
647   if (gsi_end_p (bsi) && found_return)
648     return_bb = e->src;
649 
650   return return_bb;
651 }
652 
653 /* Given return basic block RETURN_BB, see where return value is really
654    stored.  */
655 static tree
find_retval(basic_block return_bb)656 find_retval (basic_block return_bb)
657 {
658   gimple_stmt_iterator bsi;
659   for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
660     if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
661       return gimple_return_retval (gsi_stmt (bsi));
662     else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
663 	     && !gimple_clobber_p (gsi_stmt (bsi)))
664       return gimple_assign_rhs1 (gsi_stmt (bsi));
665   return NULL;
666 }
667 
668 /* Callback for walk_stmt_load_store_addr_ops.  If T is non-SSA automatic
669    variable, mark it as used in bitmap passed via DATA.
670    Return true when access to T prevents splitting the function.  */
671 
672 static bool
mark_nonssa_use(gimple stmt ATTRIBUTE_UNUSED,tree t,void * data)673 mark_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t, void *data)
674 {
675   t = get_base_address (t);
676 
677   if (!t || is_gimple_reg (t))
678     return false;
679 
680   /* At present we can't pass non-SSA arguments to split function.
681      FIXME: this can be relaxed by passing references to arguments.  */
682   if (TREE_CODE (t) == PARM_DECL)
683     {
684       if (dump_file && (dump_flags & TDF_DETAILS))
685 	fprintf (dump_file,
686 		 "Cannot split: use of non-ssa function parameter.\n");
687       return true;
688     }
689 
690   if ((TREE_CODE (t) == VAR_DECL
691        && auto_var_in_fn_p (t, current_function_decl))
692       || TREE_CODE (t) == RESULT_DECL
693       || TREE_CODE (t) == LABEL_DECL)
694     bitmap_set_bit ((bitmap)data, DECL_UID (t));
695 
696   /* For DECL_BY_REFERENCE, the return value is actually a pointer.  We want
697      to pretend that the value pointed to is actual result decl.  */
698   if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
699       && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
700       && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
701       && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
702     return
703       bitmap_bit_p ((bitmap)data,
704 		    DECL_UID (DECL_RESULT (current_function_decl)));
705 
706   return false;
707 }
708 
709 /* Compute local properties of basic block BB we collect when looking for
710    split points.  We look for ssa defs and store them in SET_SSA_NAMES,
711    for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic
712    vars stored in NON_SSA_VARS.
713 
714    When BB has edge to RETURN_BB, collect uses in RETURN_BB too.
715 
716    Return false when BB contains something that prevents it from being put into
717    split function.  */
718 
719 static bool
visit_bb(basic_block bb,basic_block return_bb,bitmap set_ssa_names,bitmap used_ssa_names,bitmap non_ssa_vars)720 visit_bb (basic_block bb, basic_block return_bb,
721 	  bitmap set_ssa_names, bitmap used_ssa_names,
722 	  bitmap non_ssa_vars)
723 {
724   gimple_stmt_iterator bsi;
725   edge e;
726   edge_iterator ei;
727   bool can_split = true;
728 
729   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
730     {
731       gimple stmt = gsi_stmt (bsi);
732       tree op;
733       ssa_op_iter iter;
734       tree decl;
735 
736       if (is_gimple_debug (stmt))
737 	continue;
738 
739       if (gimple_clobber_p (stmt))
740 	continue;
741 
742       /* FIXME: We can split regions containing EH.  We can not however
743 	 split RESX, EH_DISPATCH and EH_POINTER referring to same region
744 	 into different partitions.  This would require tracking of
745 	 EH regions and checking in consider_split_point if they
746 	 are not used elsewhere.  */
747       if (gimple_code (stmt) == GIMPLE_RESX)
748 	{
749 	  if (dump_file && (dump_flags & TDF_DETAILS))
750 	    fprintf (dump_file, "Cannot split: resx.\n");
751 	  can_split = false;
752 	}
753       if (gimple_code (stmt) == GIMPLE_EH_DISPATCH)
754 	{
755 	  if (dump_file && (dump_flags & TDF_DETAILS))
756 	    fprintf (dump_file, "Cannot split: eh dispatch.\n");
757 	  can_split = false;
758 	}
759 
760       /* Check builtins that prevent splitting.  */
761       if (gimple_code (stmt) == GIMPLE_CALL
762 	  && (decl = gimple_call_fndecl (stmt)) != NULL_TREE
763 	  && DECL_BUILT_IN (decl)
764 	  && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
765 	switch (DECL_FUNCTION_CODE (decl))
766 	  {
767 	  /* FIXME: once we will allow passing non-parm values to split part,
768 	     we need to be sure to handle correct builtin_stack_save and
769 	     builtin_stack_restore.  At the moment we are safe; there is no
770 	     way to store builtin_stack_save result in non-SSA variable
771 	     since all calls to those are compiler generated.  */
772 	  case BUILT_IN_APPLY:
773 	  case BUILT_IN_APPLY_ARGS:
774 	  case BUILT_IN_VA_START:
775 	    if (dump_file && (dump_flags & TDF_DETAILS))
776 	      fprintf (dump_file,
777 		       "Cannot split: builtin_apply and va_start.\n");
778 	    can_split = false;
779 	    break;
780 	  case BUILT_IN_EH_POINTER:
781 	    if (dump_file && (dump_flags & TDF_DETAILS))
782 	      fprintf (dump_file, "Cannot split: builtin_eh_pointer.\n");
783 	    can_split = false;
784 	    break;
785 	  default:
786 	    break;
787 	  }
788 
789       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
790 	bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
791       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
792 	bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
793       can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
794 						   mark_nonssa_use,
795 						   mark_nonssa_use,
796 						   mark_nonssa_use);
797     }
798   for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
799     {
800       gimple stmt = gsi_stmt (bsi);
801       unsigned int i;
802 
803       if (is_gimple_debug (stmt))
804 	continue;
805       if (!is_gimple_reg (gimple_phi_result (stmt)))
806 	continue;
807       bitmap_set_bit (set_ssa_names,
808 		      SSA_NAME_VERSION (gimple_phi_result (stmt)));
809       for (i = 0; i < gimple_phi_num_args (stmt); i++)
810 	{
811 	  tree op = gimple_phi_arg_def (stmt, i);
812 	  if (TREE_CODE (op) == SSA_NAME)
813 	    bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
814 	}
815       can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
816 						   mark_nonssa_use,
817 						   mark_nonssa_use,
818 						   mark_nonssa_use);
819     }
820   /* Record also uses coming from PHI operand in return BB.  */
821   FOR_EACH_EDGE (e, ei, bb->succs)
822     if (e->dest == return_bb)
823       {
824 	for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
825 	  {
826 	    gimple stmt = gsi_stmt (bsi);
827 	    tree op = gimple_phi_arg_def (stmt, e->dest_idx);
828 
829 	    if (is_gimple_debug (stmt))
830 	      continue;
831 	    if (!is_gimple_reg (gimple_phi_result (stmt)))
832 	      continue;
833 	    if (TREE_CODE (op) == SSA_NAME)
834 	      bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
835 	    else
836 	      can_split &= !mark_nonssa_use (stmt, op, non_ssa_vars);
837 	  }
838       }
839   return can_split;
840 }
841 
842 /* Stack entry for recursive DFS walk in find_split_point.  */
843 
844 typedef struct
845 {
846   /* Basic block we are examining.  */
847   basic_block bb;
848 
849   /* SSA names set and used by the BB and all BBs reachable
850      from it via DFS walk.  */
851   bitmap set_ssa_names, used_ssa_names;
852   bitmap non_ssa_vars;
853 
854   /* All BBS visited from this BB via DFS walk.  */
855   bitmap bbs_visited;
856 
857   /* Last examined edge in DFS walk.  Since we walk unoriented graph,
858      the value is up to sum of incoming and outgoing edges of BB.  */
859   unsigned int edge_num;
860 
861   /* Stack entry index of earliest BB reachable from current BB
862      or any BB visited later in DFS walk.  */
863   int earliest;
864 
865   /* Overall time and size of all BBs reached from this BB in DFS walk.  */
866   int overall_time, overall_size;
867 
868   /* When false we can not split on this BB.  */
869   bool can_split;
870 } stack_entry;
871 DEF_VEC_O(stack_entry);
872 DEF_VEC_ALLOC_O(stack_entry,heap);
873 
874 
875 /* Find all articulations and call consider_split on them.
876    OVERALL_TIME and OVERALL_SIZE is time and size of the function.
877 
878    We perform basic algorithm for finding an articulation in a graph
879    created from CFG by considering it to be an unoriented graph.
880 
881    The articulation is discovered via DFS walk. We collect earliest
882    basic block on stack that is reachable via backward edge.  Articulation
883    is any basic block such that there is no backward edge bypassing it.
884    To reduce stack usage we maintain heap allocated stack in STACK vector.
885    AUX pointer of BB is set to index it appears in the stack or -1 once
886    it is visited and popped off the stack.
887 
888    The algorithm finds articulation after visiting the whole component
889    reachable by it.  This makes it convenient to collect information about
890    the component used by consider_split.  */
891 
892 static void
find_split_points(int overall_time,int overall_size)893 find_split_points (int overall_time, int overall_size)
894 {
895   stack_entry first;
896   VEC(stack_entry, heap) *stack = NULL;
897   basic_block bb;
898   basic_block return_bb = find_return_bb ();
899   struct split_point current;
900 
901   current.header_time = overall_time;
902   current.header_size = overall_size;
903   current.split_time = 0;
904   current.split_size = 0;
905   current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
906 
907   first.bb = ENTRY_BLOCK_PTR;
908   first.edge_num = 0;
909   first.overall_time = 0;
910   first.overall_size = 0;
911   first.earliest = INT_MAX;
912   first.set_ssa_names = 0;
913   first.used_ssa_names = 0;
914   first.bbs_visited = 0;
915   VEC_safe_push (stack_entry, heap, stack, &first);
916   ENTRY_BLOCK_PTR->aux = (void *)(intptr_t)-1;
917 
918   while (!VEC_empty (stack_entry, stack))
919     {
920       stack_entry *entry = VEC_last (stack_entry, stack);
921 
922       /* We are walking an acyclic graph, so edge_num counts
923 	 succ and pred edges together.  However when considering
924          articulation, we want to have processed everything reachable
925 	 from articulation but nothing that reaches into it.  */
926       if (entry->edge_num == EDGE_COUNT (entry->bb->succs)
927 	  && entry->bb != ENTRY_BLOCK_PTR)
928 	{
929 	  int pos = VEC_length (stack_entry, stack);
930 	  entry->can_split &= visit_bb (entry->bb, return_bb,
931 					entry->set_ssa_names,
932 					entry->used_ssa_names,
933 					entry->non_ssa_vars);
934 	  if (pos <= entry->earliest && !entry->can_split
935 	      && dump_file && (dump_flags & TDF_DETAILS))
936 	    fprintf (dump_file,
937 		     "found articulation at bb %i but can not split\n",
938 		     entry->bb->index);
939 	  if (pos <= entry->earliest && entry->can_split)
940 	     {
941 	       if (dump_file && (dump_flags & TDF_DETAILS))
942 		 fprintf (dump_file, "found articulation at bb %i\n",
943 			  entry->bb->index);
944 	       current.entry_bb = entry->bb;
945 	       current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
946 	       bitmap_and_compl (current.ssa_names_to_pass,
947 				 entry->used_ssa_names, entry->set_ssa_names);
948 	       current.header_time = overall_time - entry->overall_time;
949 	       current.header_size = overall_size - entry->overall_size;
950 	       current.split_time = entry->overall_time;
951 	       current.split_size = entry->overall_size;
952 	       current.split_bbs = entry->bbs_visited;
953 	       consider_split (&current, entry->non_ssa_vars, return_bb);
954 	       BITMAP_FREE (current.ssa_names_to_pass);
955 	     }
956 	}
957       /* Do actual DFS walk.  */
958       if (entry->edge_num
959 	  < (EDGE_COUNT (entry->bb->succs)
960 	     + EDGE_COUNT (entry->bb->preds)))
961 	{
962           edge e;
963 	  basic_block dest;
964 	  if (entry->edge_num < EDGE_COUNT (entry->bb->succs))
965 	    {
966 	      e = EDGE_SUCC (entry->bb, entry->edge_num);
967 	      dest = e->dest;
968 	    }
969 	  else
970 	    {
971 	      e = EDGE_PRED (entry->bb, entry->edge_num
972 			     - EDGE_COUNT (entry->bb->succs));
973 	      dest = e->src;
974 	    }
975 
976 	  entry->edge_num++;
977 
978 	  /* New BB to visit, push it to the stack.  */
979 	  if (dest != return_bb && dest != EXIT_BLOCK_PTR
980 	      && !dest->aux)
981 	    {
982 	      stack_entry new_entry;
983 
984 	      new_entry.bb = dest;
985 	      new_entry.edge_num = 0;
986 	      new_entry.overall_time
987 		 = VEC_index (bb_info, bb_info_vec, dest->index)->time;
988 	      new_entry.overall_size
989 		 = VEC_index (bb_info, bb_info_vec, dest->index)->size;
990 	      new_entry.earliest = INT_MAX;
991 	      new_entry.set_ssa_names = BITMAP_ALLOC (NULL);
992 	      new_entry.used_ssa_names = BITMAP_ALLOC (NULL);
993 	      new_entry.bbs_visited = BITMAP_ALLOC (NULL);
994 	      new_entry.non_ssa_vars = BITMAP_ALLOC (NULL);
995 	      new_entry.can_split = true;
996 	      bitmap_set_bit (new_entry.bbs_visited, dest->index);
997 	      VEC_safe_push (stack_entry, heap, stack, &new_entry);
998 	      dest->aux = (void *)(intptr_t)VEC_length (stack_entry, stack);
999 	    }
1000 	  /* Back edge found, record the earliest point.  */
1001 	  else if ((intptr_t)dest->aux > 0
1002 		   && (intptr_t)dest->aux < entry->earliest)
1003 	    entry->earliest = (intptr_t)dest->aux;
1004 	}
1005       /* We are done with examining the edges.  Pop off the value from stack
1006 	 and merge stuff we accumulate during the walk.  */
1007       else if (entry->bb != ENTRY_BLOCK_PTR)
1008 	{
1009 	  stack_entry *prev = VEC_index (stack_entry, stack,
1010 					 VEC_length (stack_entry, stack) - 2);
1011 
1012 	  entry->bb->aux = (void *)(intptr_t)-1;
1013 	  prev->can_split &= entry->can_split;
1014 	  if (prev->set_ssa_names)
1015 	    {
1016 	      bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names);
1017 	      bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names);
1018 	      bitmap_ior_into (prev->bbs_visited, entry->bbs_visited);
1019 	      bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars);
1020 	    }
1021 	  if (prev->earliest > entry->earliest)
1022 	    prev->earliest = entry->earliest;
1023 	  prev->overall_time += entry->overall_time;
1024 	  prev->overall_size += entry->overall_size;
1025 	  BITMAP_FREE (entry->set_ssa_names);
1026 	  BITMAP_FREE (entry->used_ssa_names);
1027 	  BITMAP_FREE (entry->bbs_visited);
1028 	  BITMAP_FREE (entry->non_ssa_vars);
1029 	  VEC_pop (stack_entry, stack);
1030 	}
1031       else
1032         VEC_pop (stack_entry, stack);
1033     }
1034   ENTRY_BLOCK_PTR->aux = NULL;
1035   FOR_EACH_BB (bb)
1036     bb->aux = NULL;
1037   VEC_free (stack_entry, heap, stack);
1038   BITMAP_FREE (current.ssa_names_to_pass);
1039 }
1040 
1041 /* Split function at SPLIT_POINT.  */
1042 
1043 static void
split_function(struct split_point * split_point)1044 split_function (struct split_point *split_point)
1045 {
1046   VEC (tree, heap) *args_to_pass = NULL;
1047   bitmap args_to_skip;
1048   tree parm;
1049   int num = 0;
1050   struct cgraph_node *node, *cur_node = cgraph_get_node (current_function_decl);
1051   basic_block return_bb = find_return_bb ();
1052   basic_block call_bb;
1053   gimple_stmt_iterator gsi;
1054   gimple call;
1055   edge e;
1056   edge_iterator ei;
1057   tree retval = NULL, real_retval = NULL;
1058   bool split_part_return_p = false;
1059   gimple last_stmt = NULL;
1060   unsigned int i;
1061   tree arg;
1062 
1063   if (dump_file)
1064     {
1065       fprintf (dump_file, "\n\nSplitting function at:\n");
1066       dump_split_point (dump_file, split_point);
1067     }
1068 
1069   if (cur_node->local.can_change_signature)
1070     args_to_skip = BITMAP_ALLOC (NULL);
1071   else
1072     args_to_skip = NULL;
1073 
1074   /* Collect the parameters of new function and args_to_skip bitmap.  */
1075   for (parm = DECL_ARGUMENTS (current_function_decl);
1076        parm; parm = DECL_CHAIN (parm), num++)
1077     if (args_to_skip
1078 	&& (!is_gimple_reg (parm)
1079 	    || !gimple_default_def (cfun, parm)
1080 	    || !bitmap_bit_p (split_point->ssa_names_to_pass,
1081 			      SSA_NAME_VERSION (gimple_default_def (cfun,
1082 								    parm)))))
1083       bitmap_set_bit (args_to_skip, num);
1084     else
1085       {
1086 	/* This parm might not have been used up to now, but is going to be
1087 	   used, hence register it.  */
1088 	add_referenced_var (parm);
1089 	if (is_gimple_reg (parm))
1090 	  {
1091 	    arg = gimple_default_def (cfun, parm);
1092 	    if (!arg)
1093 	      {
1094 		arg = make_ssa_name (parm, gimple_build_nop ());
1095 		set_default_def (parm, arg);
1096 	      }
1097 	  }
1098 	else
1099 	  arg = parm;
1100 
1101 	if (!useless_type_conversion_p (DECL_ARG_TYPE (parm), TREE_TYPE (arg)))
1102 	  arg = fold_convert (DECL_ARG_TYPE (parm), arg);
1103 	VEC_safe_push (tree, heap, args_to_pass, arg);
1104       }
1105 
1106   /* See if the split function will return.  */
1107   FOR_EACH_EDGE (e, ei, return_bb->preds)
1108     if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1109       break;
1110   if (e)
1111     split_part_return_p = true;
1112 
1113   /* Add return block to what will become the split function.
1114      We do not return; no return block is needed.  */
1115   if (!split_part_return_p)
1116     ;
1117   /* We have no return block, so nothing is needed.  */
1118   else if (return_bb == EXIT_BLOCK_PTR)
1119     ;
1120   /* When we do not want to return value, we need to construct
1121      new return block with empty return statement.
1122      FIXME: Once we are able to change return type, we should change function
1123      to return void instead of just outputting function with undefined return
1124      value.  For structures this affects quality of codegen.  */
1125   else if (!split_point->split_part_set_retval
1126            && find_retval (return_bb))
1127     {
1128       bool redirected = true;
1129       basic_block new_return_bb = create_basic_block (NULL, 0, return_bb);
1130       gimple_stmt_iterator gsi = gsi_start_bb (new_return_bb);
1131       gsi_insert_after (&gsi, gimple_build_return (NULL), GSI_NEW_STMT);
1132       while (redirected)
1133 	{
1134 	  redirected = false;
1135 	  FOR_EACH_EDGE (e, ei, return_bb->preds)
1136 	    if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1137 	      {
1138 		new_return_bb->count += e->count;
1139 		new_return_bb->frequency += EDGE_FREQUENCY (e);
1140 		redirect_edge_and_branch (e, new_return_bb);
1141 		redirected = true;
1142 		break;
1143 	      }
1144 	}
1145       e = make_edge (new_return_bb, EXIT_BLOCK_PTR, 0);
1146       e->probability = REG_BR_PROB_BASE;
1147       e->count = new_return_bb->count;
1148       bitmap_set_bit (split_point->split_bbs, new_return_bb->index);
1149     }
1150   /* When we pass around the value, use existing return block.  */
1151   else
1152     bitmap_set_bit (split_point->split_bbs, return_bb->index);
1153 
1154   /* If RETURN_BB has virtual operand PHIs, they must be removed and the
1155      virtual operand marked for renaming as we change the CFG in a way that
1156      tree-inline is not able to compensate for.
1157 
1158      Note this can happen whether or not we have a return value.  If we have
1159      a return value, then RETURN_BB may have PHIs for real operands too.  */
1160   if (return_bb != EXIT_BLOCK_PTR)
1161     {
1162       bool phi_p = false;
1163       for (gsi = gsi_start_phis (return_bb); !gsi_end_p (gsi);)
1164 	{
1165 	  gimple stmt = gsi_stmt (gsi);
1166 	  if (is_gimple_reg (gimple_phi_result (stmt)))
1167 	    {
1168 	      gsi_next (&gsi);
1169 	      continue;
1170 	    }
1171 	  mark_virtual_phi_result_for_renaming (stmt);
1172 	  remove_phi_node (&gsi, true);
1173 	  phi_p = true;
1174 	}
1175       /* In reality we have to rename the reaching definition of the
1176 	 virtual operand at return_bb as we will eventually release it
1177 	 when we remove the code region we outlined.
1178 	 So we have to rename all immediate virtual uses of that region
1179 	 if we didn't see a PHI definition yet.  */
1180       /* ???  In real reality we want to set the reaching vdef of the
1181          entry of the SESE region as the vuse of the call and the reaching
1182 	 vdef of the exit of the SESE region as the vdef of the call.  */
1183       if (!phi_p)
1184 	for (gsi = gsi_start_bb (return_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1185 	  {
1186 	    gimple stmt = gsi_stmt (gsi);
1187 	    if (gimple_vuse (stmt))
1188 	      {
1189 		gimple_set_vuse (stmt, NULL_TREE);
1190 		update_stmt (stmt);
1191 	      }
1192 	    if (gimple_vdef (stmt))
1193 	      break;
1194 	  }
1195     }
1196 
1197   /* Now create the actual clone.  */
1198   rebuild_cgraph_edges ();
1199   node = cgraph_function_versioning (cur_node, NULL, NULL, args_to_skip,
1200 				     !split_part_return_p,
1201 				     split_point->split_bbs,
1202 				     split_point->entry_bb, "part");
1203   /* For usual cloning it is enough to clear builtin only when signature
1204      changes.  For partial inlining we however can not expect the part
1205      of builtin implementation to have same semantic as the whole.  */
1206   if (DECL_BUILT_IN (node->decl))
1207     {
1208       DECL_BUILT_IN_CLASS (node->decl) = NOT_BUILT_IN;
1209       DECL_FUNCTION_CODE (node->decl) = (enum built_in_function) 0;
1210     }
1211   cgraph_node_remove_callees (cur_node);
1212   if (!split_part_return_p)
1213     TREE_THIS_VOLATILE (node->decl) = 1;
1214   if (dump_file)
1215     dump_function_to_file (node->decl, dump_file, dump_flags);
1216 
1217   /* Create the basic block we place call into.  It is the entry basic block
1218      split after last label.  */
1219   call_bb = split_point->entry_bb;
1220   for (gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);)
1221     if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
1222       {
1223 	last_stmt = gsi_stmt (gsi);
1224 	gsi_next (&gsi);
1225       }
1226     else
1227       break;
1228   e = split_block (split_point->entry_bb, last_stmt);
1229   remove_edge (e);
1230 
1231   /* Produce the call statement.  */
1232   gsi = gsi_last_bb (call_bb);
1233   FOR_EACH_VEC_ELT (tree, args_to_pass, i, arg)
1234     if (!is_gimple_val (arg))
1235       {
1236 	arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE,
1237 					false, GSI_CONTINUE_LINKING);
1238 	VEC_replace (tree, args_to_pass, i, arg);
1239       }
1240   call = gimple_build_call_vec (node->decl, args_to_pass);
1241   gimple_set_block (call, DECL_INITIAL (current_function_decl));
1242   VEC_free (tree, heap, args_to_pass);
1243 
1244   /* We avoid address being taken on any variable used by split part,
1245      so return slot optimization is always possible.  Moreover this is
1246      required to make DECL_BY_REFERENCE work.  */
1247   if (aggregate_value_p (DECL_RESULT (current_function_decl),
1248 			 TREE_TYPE (current_function_decl)))
1249     gimple_call_set_return_slot_opt (call, true);
1250 
1251   /* Update return value.  This is bit tricky.  When we do not return,
1252      do nothing.  When we return we might need to update return_bb
1253      or produce a new return statement.  */
1254   if (!split_part_return_p)
1255     gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1256   else
1257     {
1258       e = make_edge (call_bb, return_bb,
1259 		     return_bb == EXIT_BLOCK_PTR ? 0 : EDGE_FALLTHRU);
1260       e->count = call_bb->count;
1261       e->probability = REG_BR_PROB_BASE;
1262 
1263       /* If there is return basic block, see what value we need to store
1264          return value into and put call just before it.  */
1265       if (return_bb != EXIT_BLOCK_PTR)
1266 	{
1267 	  real_retval = retval = find_retval (return_bb);
1268 
1269 	  if (real_retval && split_point->split_part_set_retval)
1270 	    {
1271 	      gimple_stmt_iterator psi;
1272 
1273 	      /* See if we need new SSA_NAME for the result.
1274 		 When DECL_BY_REFERENCE is true, retval is actually pointer to
1275 		 return value and it is constant in whole function.  */
1276 	      if (TREE_CODE (retval) == SSA_NAME
1277 		  && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1278 		{
1279 		  retval = make_ssa_name (SSA_NAME_VAR (retval), call);
1280 
1281 		  /* See if there is PHI defining return value.  */
1282 		  for (psi = gsi_start_phis (return_bb);
1283 		       !gsi_end_p (psi); gsi_next (&psi))
1284 		    if (is_gimple_reg (gimple_phi_result (gsi_stmt (psi))))
1285 		      break;
1286 
1287 		  /* When there is PHI, just update its value.  */
1288 		  if (TREE_CODE (retval) == SSA_NAME
1289 		      && !gsi_end_p (psi))
1290 		    add_phi_arg (gsi_stmt (psi), retval, e, UNKNOWN_LOCATION);
1291 		  /* Otherwise update the return BB itself.
1292 		     find_return_bb allows at most one assignment to return value,
1293 		     so update first statement.  */
1294 		  else
1295 		    {
1296 		      gimple_stmt_iterator bsi;
1297 		      for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi);
1298 			   gsi_next (&bsi))
1299 			if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
1300 			  {
1301 			    gimple_return_set_retval (gsi_stmt (bsi), retval);
1302 			    break;
1303 			  }
1304 			else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
1305 				 && !gimple_clobber_p (gsi_stmt (bsi)))
1306 			  {
1307 			    gimple_assign_set_rhs1 (gsi_stmt (bsi), retval);
1308 			    break;
1309 			  }
1310 		      update_stmt (gsi_stmt (bsi));
1311 		    }
1312 		}
1313 	      if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1314 		{
1315 		  gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1316 		  gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1317 		}
1318 	      else
1319 		{
1320 		  tree restype;
1321 		  restype = TREE_TYPE (DECL_RESULT (current_function_decl));
1322 		  gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1323 		  if (!useless_type_conversion_p (TREE_TYPE (retval), restype))
1324 		    {
1325 		      gimple cpy;
1326 		      tree tem = create_tmp_reg (restype, NULL);
1327 		      tem = make_ssa_name (tem, call);
1328 		      cpy = gimple_build_assign_with_ops (NOP_EXPR, retval,
1329 							  tem, NULL_TREE);
1330 		      gsi_insert_after (&gsi, cpy, GSI_NEW_STMT);
1331 		      retval = tem;
1332 		    }
1333 		  gimple_call_set_lhs (call, retval);
1334 		  update_stmt (call);
1335 		}
1336 	    }
1337 	  else
1338 	    gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1339 	}
1340       /* We don't use return block (there is either no return in function or
1341 	 multiple of them).  So create new basic block with return statement.
1342 	 */
1343       else
1344 	{
1345 	  gimple ret;
1346 	  if (split_point->split_part_set_retval
1347 	      && !VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
1348 	    {
1349 	      retval = DECL_RESULT (current_function_decl);
1350 
1351 	      /* We use temporary register to hold value when aggregate_value_p
1352 		 is false.  Similarly for DECL_BY_REFERENCE we must avoid extra
1353 		 copy.  */
1354 	      if (!aggregate_value_p (retval, TREE_TYPE (current_function_decl))
1355 		  && !DECL_BY_REFERENCE (retval))
1356 		retval = create_tmp_reg (TREE_TYPE (retval), NULL);
1357 	      if (is_gimple_reg (retval))
1358 		{
1359 		  /* When returning by reference, there is only one SSA name
1360 		     assigned to RESULT_DECL (that is pointer to return value).
1361 		     Look it up or create new one if it is missing.  */
1362 		  if (DECL_BY_REFERENCE (retval))
1363 		    {
1364 		      tree retval_name;
1365 		      if ((retval_name = gimple_default_def (cfun, retval))
1366 			  != NULL)
1367 			retval = retval_name;
1368 		      else
1369 			{
1370 		          retval_name = make_ssa_name (retval,
1371 						       gimple_build_nop ());
1372 			  set_default_def (retval, retval_name);
1373 			  retval = retval_name;
1374 			}
1375 		    }
1376 		  /* Otherwise produce new SSA name for return value.  */
1377 		  else
1378 		    retval = make_ssa_name (retval, call);
1379 		}
1380 	      if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1381 	        gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1382 	      else
1383 	        gimple_call_set_lhs (call, retval);
1384 	    }
1385           gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1386 	  ret = gimple_build_return (retval);
1387 	  gsi_insert_after (&gsi, ret, GSI_NEW_STMT);
1388 	}
1389     }
1390   free_dominance_info (CDI_DOMINATORS);
1391   free_dominance_info (CDI_POST_DOMINATORS);
1392   compute_inline_parameters (node, true);
1393 }
1394 
1395 /* Execute function splitting pass.  */
1396 
1397 static unsigned int
execute_split_functions(void)1398 execute_split_functions (void)
1399 {
1400   gimple_stmt_iterator bsi;
1401   basic_block bb;
1402   int overall_time = 0, overall_size = 0;
1403   int todo = 0;
1404   struct cgraph_node *node = cgraph_get_node (current_function_decl);
1405 
1406   if (flags_from_decl_or_type (current_function_decl)
1407       & (ECF_NORETURN|ECF_MALLOC))
1408     {
1409       if (dump_file)
1410 	fprintf (dump_file, "Not splitting: noreturn/malloc function.\n");
1411       return 0;
1412     }
1413   if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
1414     {
1415       if (dump_file)
1416 	fprintf (dump_file, "Not splitting: main function.\n");
1417       return 0;
1418     }
1419   /* This can be relaxed; function might become inlinable after splitting
1420      away the uninlinable part.  */
1421   if (!inline_summary (node)->inlinable)
1422     {
1423       if (dump_file)
1424 	fprintf (dump_file, "Not splitting: not inlinable.\n");
1425       return 0;
1426     }
1427   if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1428     {
1429       if (dump_file)
1430 	fprintf (dump_file, "Not splitting: disregarding inline limits.\n");
1431       return 0;
1432     }
1433   /* This can be relaxed; most of versioning tests actually prevents
1434      a duplication.  */
1435   if (!tree_versionable_function_p (current_function_decl))
1436     {
1437       if (dump_file)
1438 	fprintf (dump_file, "Not splitting: not versionable.\n");
1439       return 0;
1440     }
1441   /* FIXME: we could support this.  */
1442   if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl)
1443     {
1444       if (dump_file)
1445 	fprintf (dump_file, "Not splitting: nested function.\n");
1446       return 0;
1447     }
1448 
1449   /* See if it makes sense to try to split.
1450      It makes sense to split if we inline, that is if we have direct calls to
1451      handle or direct calls are possibly going to appear as result of indirect
1452      inlining or LTO.  Also handle -fprofile-generate as LTO to allow non-LTO
1453      training for LTO -fprofile-use build.
1454 
1455      Note that we are not completely conservative about disqualifying functions
1456      called once.  It is possible that the caller is called more then once and
1457      then inlining would still benefit.  */
1458   if ((!node->callers || !node->callers->next_caller)
1459       && !node->address_taken
1460       && (!flag_lto || !node->local.externally_visible))
1461     {
1462       if (dump_file)
1463 	fprintf (dump_file, "Not splitting: not called directly "
1464 		 "or called once.\n");
1465       return 0;
1466     }
1467 
1468   /* FIXME: We can actually split if splitting reduces call overhead.  */
1469   if (!flag_inline_small_functions
1470       && !DECL_DECLARED_INLINE_P (current_function_decl))
1471     {
1472       if (dump_file)
1473 	fprintf (dump_file, "Not splitting: not autoinlining and function"
1474 		 " is not inline.\n");
1475       return 0;
1476     }
1477 
1478   /* Initialize bitmap to track forbidden calls.  */
1479   forbidden_dominators = BITMAP_ALLOC (NULL);
1480   calculate_dominance_info (CDI_DOMINATORS);
1481 
1482   /* Compute local info about basic blocks and determine function size/time.  */
1483   VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1);
1484   memset (&best_split_point, 0, sizeof (best_split_point));
1485   FOR_EACH_BB (bb)
1486     {
1487       int time = 0;
1488       int size = 0;
1489       int freq = compute_call_stmt_bb_frequency (current_function_decl, bb);
1490 
1491       if (dump_file && (dump_flags & TDF_DETAILS))
1492 	fprintf (dump_file, "Basic block %i\n", bb->index);
1493 
1494       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1495 	{
1496 	  int this_time, this_size;
1497 	  gimple stmt = gsi_stmt (bsi);
1498 
1499 	  this_size = estimate_num_insns (stmt, &eni_size_weights);
1500 	  this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
1501 	  size += this_size;
1502 	  time += this_time;
1503 	  check_forbidden_calls (stmt);
1504 
1505 	  if (dump_file && (dump_flags & TDF_DETAILS))
1506 	    {
1507 	      fprintf (dump_file, "  freq:%6i size:%3i time:%3i ",
1508 		       freq, this_size, this_time);
1509 	      print_gimple_stmt (dump_file, stmt, 0, 0);
1510 	    }
1511 	}
1512       overall_time += time;
1513       overall_size += size;
1514       VEC_index (bb_info, bb_info_vec, bb->index)->time = time;
1515       VEC_index (bb_info, bb_info_vec, bb->index)->size = size;
1516     }
1517   find_split_points (overall_time, overall_size);
1518   if (best_split_point.split_bbs)
1519     {
1520       split_function (&best_split_point);
1521       BITMAP_FREE (best_split_point.ssa_names_to_pass);
1522       BITMAP_FREE (best_split_point.split_bbs);
1523       todo = TODO_update_ssa | TODO_cleanup_cfg;
1524     }
1525   BITMAP_FREE (forbidden_dominators);
1526   VEC_free (bb_info, heap, bb_info_vec);
1527   bb_info_vec = NULL;
1528   return todo;
1529 }
1530 
1531 /* Gate function splitting pass.  When doing profile feedback, we want
1532    to execute the pass after profiling is read.  So disable one in
1533    early optimization.  */
1534 
1535 static bool
gate_split_functions(void)1536 gate_split_functions (void)
1537 {
1538   return (flag_partial_inlining
1539 	  && !profile_arc_flag && !flag_branch_probabilities);
1540 }
1541 
1542 struct gimple_opt_pass pass_split_functions =
1543 {
1544  {
1545   GIMPLE_PASS,
1546   "fnsplit",				/* name */
1547   gate_split_functions,			/* gate */
1548   execute_split_functions,		/* execute */
1549   NULL,					/* sub */
1550   NULL,					/* next */
1551   0,					/* static_pass_number */
1552   TV_IPA_FNSPLIT,			/* tv_id */
1553   PROP_cfg,				/* properties_required */
1554   0,					/* properties_provided */
1555   0,					/* properties_destroyed */
1556   0,					/* todo_flags_start */
1557   TODO_verify_all      			/* todo_flags_finish */
1558  }
1559 };
1560 
1561 /* Gate feedback driven function splitting pass.
1562    We don't need to split when profiling at all, we are producing
1563    lousy code anyway.  */
1564 
1565 static bool
gate_feedback_split_functions(void)1566 gate_feedback_split_functions (void)
1567 {
1568   return (flag_partial_inlining
1569 	  && flag_branch_probabilities);
1570 }
1571 
1572 /* Execute function splitting pass.  */
1573 
1574 static unsigned int
execute_feedback_split_functions(void)1575 execute_feedback_split_functions (void)
1576 {
1577   unsigned int retval = execute_split_functions ();
1578   if (retval)
1579     retval |= TODO_rebuild_cgraph_edges;
1580   return retval;
1581 }
1582 
1583 struct gimple_opt_pass pass_feedback_split_functions =
1584 {
1585  {
1586   GIMPLE_PASS,
1587   "feedback_fnsplit",			/* name */
1588   gate_feedback_split_functions,	/* gate */
1589   execute_feedback_split_functions,	/* execute */
1590   NULL,					/* sub */
1591   NULL,					/* next */
1592   0,					/* static_pass_number */
1593   TV_IPA_FNSPLIT,			/* tv_id */
1594   PROP_cfg,				/* properties_required */
1595   0,					/* properties_provided */
1596   0,					/* properties_destroyed */
1597   0,					/* todo_flags_start */
1598   TODO_verify_all      			/* todo_flags_finish */
1599  }
1600 };
1601