1 /* Function splitting pass
2 Copyright (C) 2010-2014 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka <jh@suse.cz>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* The purpose of this pass is to split function bodies to improve
22 inlining. I.e. for function of the form:
23
24 func (...)
25 {
26 if (cheap_test)
27 something_small
28 else
29 something_big
30 }
31
32 Produce:
33
34 func.part (...)
35 {
36 something_big
37 }
38
39 func (...)
40 {
41 if (cheap_test)
42 something_small
43 else
44 func.part (...);
45 }
46
47 When func becomes inlinable and when cheap_test is often true, inlining func,
48 but not fund.part leads to performance improvement similar as inlining
49 original func while the code size growth is smaller.
50
51 The pass is organized in three stages:
52 1) Collect local info about basic block into BB_INFO structure and
53 compute function body estimated size and time.
54 2) Via DFS walk find all possible basic blocks where we can split
55 and chose best one.
56 3) If split point is found, split at the specified BB by creating a clone
57 and updating function to call it.
58
59 The decisions what functions to split are in execute_split_functions
60 and consider_split.
61
62 There are several possible future improvements for this pass including:
63
64 1) Splitting to break up large functions
65 2) Splitting to reduce stack frame usage
66 3) Allow split part of function to use values computed in the header part.
67 The values needs to be passed to split function, perhaps via same
68 interface as for nested functions or as argument.
69 4) Support for simple rematerialization. I.e. when split part use
70 value computed in header from function parameter in very cheap way, we
71 can just recompute it.
72 5) Support splitting of nested functions.
73 6) Support non-SSA arguments.
74 7) There is nothing preventing us from producing multiple parts of single function
75 when needed or splitting also the parts. */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "tree.h"
81 #include "basic-block.h"
82 #include "tree-ssa-alias.h"
83 #include "internal-fn.h"
84 #include "gimple-expr.h"
85 #include "is-a.h"
86 #include "gimple.h"
87 #include "stringpool.h"
88 #include "expr.h"
89 #include "calls.h"
90 #include "gimplify.h"
91 #include "gimple-iterator.h"
92 #include "gimplify-me.h"
93 #include "gimple-walk.h"
94 #include "target.h"
95 #include "ipa-prop.h"
96 #include "gimple-ssa.h"
97 #include "tree-cfg.h"
98 #include "tree-phinodes.h"
99 #include "ssa-iterators.h"
100 #include "stringpool.h"
101 #include "tree-ssanames.h"
102 #include "tree-into-ssa.h"
103 #include "tree-dfa.h"
104 #include "tree-pass.h"
105 #include "flags.h"
106 #include "diagnostic.h"
107 #include "tree-dump.h"
108 #include "tree-inline.h"
109 #include "params.h"
110 #include "gimple-pretty-print.h"
111 #include "ipa-inline.h"
112 #include "cfgloop.h"
113
114 /* Per basic block info. */
115
116 typedef struct
117 {
118 unsigned int size;
119 unsigned int time;
120 } bb_info;
121
122 static vec<bb_info> bb_info_vec;
123
124 /* Description of split point. */
125
126 struct split_point
127 {
128 /* Size of the partitions. */
129 unsigned int header_time, header_size, split_time, split_size;
130
131 /* SSA names that need to be passed into spit function. */
132 bitmap ssa_names_to_pass;
133
134 /* Basic block where we split (that will become entry point of new function. */
135 basic_block entry_bb;
136
137 /* Basic blocks we are splitting away. */
138 bitmap split_bbs;
139
140 /* True when return value is computed on split part and thus it needs
141 to be returned. */
142 bool split_part_set_retval;
143 };
144
145 /* Best split point found. */
146
147 struct split_point best_split_point;
148
149 /* Set of basic blocks that are not allowed to dominate a split point. */
150
151 static bitmap forbidden_dominators;
152
153 static tree find_retval (basic_block return_bb);
154
155 /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic
156 variable, check it if it is present in bitmap passed via DATA. */
157
158 static bool
test_nonssa_use(gimple,tree t,tree,void * data)159 test_nonssa_use (gimple, tree t, tree, void *data)
160 {
161 t = get_base_address (t);
162
163 if (!t || is_gimple_reg (t))
164 return false;
165
166 if (TREE_CODE (t) == PARM_DECL
167 || (TREE_CODE (t) == VAR_DECL
168 && auto_var_in_fn_p (t, current_function_decl))
169 || TREE_CODE (t) == RESULT_DECL
170 /* Normal labels are part of CFG and will be handled gratefuly.
171 Forced labels however can be used directly by statements and
172 need to stay in one partition along with their uses. */
173 || (TREE_CODE (t) == LABEL_DECL
174 && FORCED_LABEL (t)))
175 return bitmap_bit_p ((bitmap)data, DECL_UID (t));
176
177 /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want
178 to pretend that the value pointed to is actual result decl. */
179 if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
180 && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
181 && SSA_NAME_VAR (TREE_OPERAND (t, 0))
182 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
183 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
184 return
185 bitmap_bit_p ((bitmap)data,
186 DECL_UID (DECL_RESULT (current_function_decl)));
187
188 return false;
189 }
190
191 /* Dump split point CURRENT. */
192
193 static void
dump_split_point(FILE * file,struct split_point * current)194 dump_split_point (FILE * file, struct split_point *current)
195 {
196 fprintf (file,
197 "Split point at BB %i\n"
198 " header time: %i header size: %i\n"
199 " split time: %i split size: %i\n bbs: ",
200 current->entry_bb->index, current->header_time,
201 current->header_size, current->split_time, current->split_size);
202 dump_bitmap (file, current->split_bbs);
203 fprintf (file, " SSA names to pass: ");
204 dump_bitmap (file, current->ssa_names_to_pass);
205 }
206
207 /* Look for all BBs in header that might lead to the split part and verify
208 that they are not defining any non-SSA var used by the split part.
209 Parameters are the same as for consider_split. */
210
211 static bool
verify_non_ssa_vars(struct split_point * current,bitmap non_ssa_vars,basic_block return_bb)212 verify_non_ssa_vars (struct split_point *current, bitmap non_ssa_vars,
213 basic_block return_bb)
214 {
215 bitmap seen = BITMAP_ALLOC (NULL);
216 vec<basic_block> worklist = vNULL;
217 edge e;
218 edge_iterator ei;
219 bool ok = true;
220 basic_block bb;
221
222 FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
223 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
224 && !bitmap_bit_p (current->split_bbs, e->src->index))
225 {
226 worklist.safe_push (e->src);
227 bitmap_set_bit (seen, e->src->index);
228 }
229
230 while (!worklist.is_empty ())
231 {
232 gimple_stmt_iterator bsi;
233
234 bb = worklist.pop ();
235 FOR_EACH_EDGE (e, ei, bb->preds)
236 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
237 && bitmap_set_bit (seen, e->src->index))
238 {
239 gcc_checking_assert (!bitmap_bit_p (current->split_bbs,
240 e->src->index));
241 worklist.safe_push (e->src);
242 }
243 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
244 {
245 gimple stmt = gsi_stmt (bsi);
246 if (is_gimple_debug (stmt))
247 continue;
248 if (walk_stmt_load_store_addr_ops
249 (stmt, non_ssa_vars, test_nonssa_use, test_nonssa_use,
250 test_nonssa_use))
251 {
252 ok = false;
253 goto done;
254 }
255 if (gimple_code (stmt) == GIMPLE_LABEL
256 && test_nonssa_use (stmt, gimple_label_label (stmt),
257 NULL_TREE, non_ssa_vars))
258 {
259 ok = false;
260 goto done;
261 }
262 }
263 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
264 {
265 if (walk_stmt_load_store_addr_ops
266 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use, test_nonssa_use,
267 test_nonssa_use))
268 {
269 ok = false;
270 goto done;
271 }
272 }
273 FOR_EACH_EDGE (e, ei, bb->succs)
274 {
275 if (e->dest != return_bb)
276 continue;
277 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi);
278 gsi_next (&bsi))
279 {
280 gimple stmt = gsi_stmt (bsi);
281 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
282
283 if (virtual_operand_p (gimple_phi_result (stmt)))
284 continue;
285 if (TREE_CODE (op) != SSA_NAME
286 && test_nonssa_use (stmt, op, op, non_ssa_vars))
287 {
288 ok = false;
289 goto done;
290 }
291 }
292 }
293 }
294
295 /* Verify that the rest of function does not define any label
296 used by the split part. */
297 FOR_EACH_BB_FN (bb, cfun)
298 if (!bitmap_bit_p (current->split_bbs, bb->index)
299 && !bitmap_bit_p (seen, bb->index))
300 {
301 gimple_stmt_iterator bsi;
302 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
303 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_LABEL
304 && test_nonssa_use (gsi_stmt (bsi),
305 gimple_label_label (gsi_stmt (bsi)),
306 NULL_TREE, non_ssa_vars))
307 {
308 ok = false;
309 goto done;
310 }
311 else if (gimple_code (gsi_stmt (bsi)) != GIMPLE_LABEL)
312 break;
313 }
314
315 done:
316 BITMAP_FREE (seen);
317 worklist.release ();
318 return ok;
319 }
320
321 /* If STMT is a call, check the callee against a list of forbidden
322 predicate functions. If a match is found, look for uses of the
323 call result in condition statements that compare against zero.
324 For each such use, find the block targeted by the condition
325 statement for the nonzero result, and set the bit for this block
326 in the forbidden dominators bitmap. The purpose of this is to avoid
327 selecting a split point where we are likely to lose the chance
328 to optimize away an unused function call. */
329
330 static void
check_forbidden_calls(gimple stmt)331 check_forbidden_calls (gimple stmt)
332 {
333 imm_use_iterator use_iter;
334 use_operand_p use_p;
335 tree lhs;
336
337 /* At the moment, __builtin_constant_p is the only forbidden
338 predicate function call (see PR49642). */
339 if (!gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
340 return;
341
342 lhs = gimple_call_lhs (stmt);
343
344 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
345 return;
346
347 FOR_EACH_IMM_USE_FAST (use_p, use_iter, lhs)
348 {
349 tree op1;
350 basic_block use_bb, forbidden_bb;
351 enum tree_code code;
352 edge true_edge, false_edge;
353 gimple use_stmt = USE_STMT (use_p);
354
355 if (gimple_code (use_stmt) != GIMPLE_COND)
356 continue;
357
358 /* Assuming canonical form for GIMPLE_COND here, with constant
359 in second position. */
360 op1 = gimple_cond_rhs (use_stmt);
361 code = gimple_cond_code (use_stmt);
362 use_bb = gimple_bb (use_stmt);
363
364 extract_true_false_edges_from_block (use_bb, &true_edge, &false_edge);
365
366 /* We're only interested in comparisons that distinguish
367 unambiguously from zero. */
368 if (!integer_zerop (op1) || code == LE_EXPR || code == GE_EXPR)
369 continue;
370
371 if (code == EQ_EXPR)
372 forbidden_bb = false_edge->dest;
373 else
374 forbidden_bb = true_edge->dest;
375
376 bitmap_set_bit (forbidden_dominators, forbidden_bb->index);
377 }
378 }
379
380 /* If BB is dominated by any block in the forbidden dominators set,
381 return TRUE; else FALSE. */
382
383 static bool
dominated_by_forbidden(basic_block bb)384 dominated_by_forbidden (basic_block bb)
385 {
386 unsigned dom_bb;
387 bitmap_iterator bi;
388
389 EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
390 {
391 if (dominated_by_p (CDI_DOMINATORS, bb,
392 BASIC_BLOCK_FOR_FN (cfun, dom_bb)))
393 return true;
394 }
395
396 return false;
397 }
398
399 /* We found an split_point CURRENT. NON_SSA_VARS is bitmap of all non ssa
400 variables used and RETURN_BB is return basic block.
401 See if we can split function here. */
402
403 static void
consider_split(struct split_point * current,bitmap non_ssa_vars,basic_block return_bb)404 consider_split (struct split_point *current, bitmap non_ssa_vars,
405 basic_block return_bb)
406 {
407 tree parm;
408 unsigned int num_args = 0;
409 unsigned int call_overhead;
410 edge e;
411 edge_iterator ei;
412 gimple_stmt_iterator bsi;
413 unsigned int i;
414 int incoming_freq = 0;
415 tree retval;
416 bool back_edge = false;
417
418 if (dump_file && (dump_flags & TDF_DETAILS))
419 dump_split_point (dump_file, current);
420
421 FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
422 {
423 if (e->flags & EDGE_DFS_BACK)
424 back_edge = true;
425 if (!bitmap_bit_p (current->split_bbs, e->src->index))
426 incoming_freq += EDGE_FREQUENCY (e);
427 }
428
429 /* Do not split when we would end up calling function anyway. */
430 if (incoming_freq
431 >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency
432 * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
433 {
434 /* When profile is guessed, we can not expect it to give us
435 realistic estimate on likelyness of function taking the
436 complex path. As a special case, when tail of the function is
437 a loop, enable splitting since inlining code skipping the loop
438 is likely noticeable win. */
439 if (back_edge
440 && profile_status_for_fn (cfun) != PROFILE_READ
441 && incoming_freq < ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
442 {
443 if (dump_file && (dump_flags & TDF_DETAILS))
444 fprintf (dump_file,
445 " Split before loop, accepting despite low frequencies %i %i.\n",
446 incoming_freq,
447 ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
448 }
449 else
450 {
451 if (dump_file && (dump_flags & TDF_DETAILS))
452 fprintf (dump_file,
453 " Refused: incoming frequency is too large.\n");
454 return;
455 }
456 }
457
458 if (!current->header_size)
459 {
460 if (dump_file && (dump_flags & TDF_DETAILS))
461 fprintf (dump_file, " Refused: header empty\n");
462 return;
463 }
464
465 /* Verify that PHI args on entry are either virtual or all their operands
466 incoming from header are the same. */
467 for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi))
468 {
469 gimple stmt = gsi_stmt (bsi);
470 tree val = NULL;
471
472 if (virtual_operand_p (gimple_phi_result (stmt)))
473 continue;
474 for (i = 0; i < gimple_phi_num_args (stmt); i++)
475 {
476 edge e = gimple_phi_arg_edge (stmt, i);
477 if (!bitmap_bit_p (current->split_bbs, e->src->index))
478 {
479 tree edge_val = gimple_phi_arg_def (stmt, i);
480 if (val && edge_val != val)
481 {
482 if (dump_file && (dump_flags & TDF_DETAILS))
483 fprintf (dump_file,
484 " Refused: entry BB has PHI with multiple variants\n");
485 return;
486 }
487 val = edge_val;
488 }
489 }
490 }
491
492
493 /* See what argument we will pass to the split function and compute
494 call overhead. */
495 call_overhead = eni_size_weights.call_cost;
496 for (parm = DECL_ARGUMENTS (current_function_decl); parm;
497 parm = DECL_CHAIN (parm))
498 {
499 if (!is_gimple_reg (parm))
500 {
501 if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm)))
502 {
503 if (dump_file && (dump_flags & TDF_DETAILS))
504 fprintf (dump_file,
505 " Refused: need to pass non-ssa param values\n");
506 return;
507 }
508 }
509 else
510 {
511 tree ddef = ssa_default_def (cfun, parm);
512 if (ddef
513 && bitmap_bit_p (current->ssa_names_to_pass,
514 SSA_NAME_VERSION (ddef)))
515 {
516 if (!VOID_TYPE_P (TREE_TYPE (parm)))
517 call_overhead += estimate_move_cost (TREE_TYPE (parm));
518 num_args++;
519 }
520 }
521 }
522 if (!VOID_TYPE_P (TREE_TYPE (current_function_decl)))
523 call_overhead += estimate_move_cost (TREE_TYPE (current_function_decl));
524
525 if (current->split_size <= call_overhead)
526 {
527 if (dump_file && (dump_flags & TDF_DETAILS))
528 fprintf (dump_file,
529 " Refused: split size is smaller than call overhead\n");
530 return;
531 }
532 if (current->header_size + call_overhead
533 >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl)
534 ? MAX_INLINE_INSNS_SINGLE
535 : MAX_INLINE_INSNS_AUTO))
536 {
537 if (dump_file && (dump_flags & TDF_DETAILS))
538 fprintf (dump_file,
539 " Refused: header size is too large for inline candidate\n");
540 return;
541 }
542
543 /* FIXME: we currently can pass only SSA function parameters to the split
544 arguments. Once parm_adjustment infrastructure is supported by cloning,
545 we can pass more than that. */
546 if (num_args != bitmap_count_bits (current->ssa_names_to_pass))
547 {
548
549 if (dump_file && (dump_flags & TDF_DETAILS))
550 fprintf (dump_file,
551 " Refused: need to pass non-param values\n");
552 return;
553 }
554
555 /* When there are non-ssa vars used in the split region, see if they
556 are used in the header region. If so, reject the split.
557 FIXME: we can use nested function support to access both. */
558 if (!bitmap_empty_p (non_ssa_vars)
559 && !verify_non_ssa_vars (current, non_ssa_vars, return_bb))
560 {
561 if (dump_file && (dump_flags & TDF_DETAILS))
562 fprintf (dump_file,
563 " Refused: split part has non-ssa uses\n");
564 return;
565 }
566
567 /* If the split point is dominated by a forbidden block, reject
568 the split. */
569 if (!bitmap_empty_p (forbidden_dominators)
570 && dominated_by_forbidden (current->entry_bb))
571 {
572 if (dump_file && (dump_flags & TDF_DETAILS))
573 fprintf (dump_file,
574 " Refused: split point dominated by forbidden block\n");
575 return;
576 }
577
578 /* See if retval used by return bb is computed by header or split part.
579 When it is computed by split part, we need to produce return statement
580 in the split part and add code to header to pass it around.
581
582 This is bit tricky to test:
583 1) When there is no return_bb or no return value, we always pass
584 value around.
585 2) Invariants are always computed by caller.
586 3) For SSA we need to look if defining statement is in header or split part
587 4) For non-SSA we need to look where the var is computed. */
588 retval = find_retval (return_bb);
589 if (!retval)
590 current->split_part_set_retval = true;
591 else if (is_gimple_min_invariant (retval))
592 current->split_part_set_retval = false;
593 /* Special case is value returned by reference we record as if it was non-ssa
594 set to result_decl. */
595 else if (TREE_CODE (retval) == SSA_NAME
596 && SSA_NAME_VAR (retval)
597 && TREE_CODE (SSA_NAME_VAR (retval)) == RESULT_DECL
598 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
599 current->split_part_set_retval
600 = bitmap_bit_p (non_ssa_vars, DECL_UID (SSA_NAME_VAR (retval)));
601 else if (TREE_CODE (retval) == SSA_NAME)
602 current->split_part_set_retval
603 = (!SSA_NAME_IS_DEFAULT_DEF (retval)
604 && (bitmap_bit_p (current->split_bbs,
605 gimple_bb (SSA_NAME_DEF_STMT (retval))->index)
606 || gimple_bb (SSA_NAME_DEF_STMT (retval)) == return_bb));
607 else if (TREE_CODE (retval) == PARM_DECL)
608 current->split_part_set_retval = false;
609 else if (TREE_CODE (retval) == VAR_DECL
610 || TREE_CODE (retval) == RESULT_DECL)
611 current->split_part_set_retval
612 = bitmap_bit_p (non_ssa_vars, DECL_UID (retval));
613 else
614 current->split_part_set_retval = true;
615
616 /* split_function fixes up at most one PHI non-virtual PHI node in return_bb,
617 for the return value. If there are other PHIs, give up. */
618 if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
619 {
620 gimple_stmt_iterator psi;
621
622 for (psi = gsi_start_phis (return_bb); !gsi_end_p (psi); gsi_next (&psi))
623 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (psi)))
624 && !(retval
625 && current->split_part_set_retval
626 && TREE_CODE (retval) == SSA_NAME
627 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
628 && SSA_NAME_DEF_STMT (retval) == gsi_stmt (psi)))
629 {
630 if (dump_file && (dump_flags & TDF_DETAILS))
631 fprintf (dump_file,
632 " Refused: return bb has extra PHIs\n");
633 return;
634 }
635 }
636
637 if (dump_file && (dump_flags & TDF_DETAILS))
638 fprintf (dump_file, " Accepted!\n");
639
640 /* At the moment chose split point with lowest frequency and that leaves
641 out smallest size of header.
642 In future we might re-consider this heuristics. */
643 if (!best_split_point.split_bbs
644 || best_split_point.entry_bb->frequency > current->entry_bb->frequency
645 || (best_split_point.entry_bb->frequency == current->entry_bb->frequency
646 && best_split_point.split_size < current->split_size))
647
648 {
649 if (dump_file && (dump_flags & TDF_DETAILS))
650 fprintf (dump_file, " New best split point!\n");
651 if (best_split_point.ssa_names_to_pass)
652 {
653 BITMAP_FREE (best_split_point.ssa_names_to_pass);
654 BITMAP_FREE (best_split_point.split_bbs);
655 }
656 best_split_point = *current;
657 best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL);
658 bitmap_copy (best_split_point.ssa_names_to_pass,
659 current->ssa_names_to_pass);
660 best_split_point.split_bbs = BITMAP_ALLOC (NULL);
661 bitmap_copy (best_split_point.split_bbs, current->split_bbs);
662 }
663 }
664
665 /* Return basic block containing RETURN statement. We allow basic blocks
666 of the form:
667 <retval> = tmp_var;
668 return <retval>
669 but return_bb can not be more complex than this.
670 If nothing is found, return the exit block.
671
672 When there are multiple RETURN statement, chose one with return value,
673 since that one is more likely shared by multiple code paths.
674
675 Return BB is special, because for function splitting it is the only
676 basic block that is duplicated in between header and split part of the
677 function.
678
679 TODO: We might support multiple return blocks. */
680
681 static basic_block
find_return_bb(void)682 find_return_bb (void)
683 {
684 edge e;
685 basic_block return_bb = EXIT_BLOCK_PTR_FOR_FN (cfun);
686 gimple_stmt_iterator bsi;
687 bool found_return = false;
688 tree retval = NULL_TREE;
689
690 if (!single_pred_p (EXIT_BLOCK_PTR_FOR_FN (cfun)))
691 return return_bb;
692
693 e = single_pred_edge (EXIT_BLOCK_PTR_FOR_FN (cfun));
694 for (bsi = gsi_last_bb (e->src); !gsi_end_p (bsi); gsi_prev (&bsi))
695 {
696 gimple stmt = gsi_stmt (bsi);
697 if (gimple_code (stmt) == GIMPLE_LABEL
698 || is_gimple_debug (stmt)
699 || gimple_clobber_p (stmt))
700 ;
701 else if (gimple_code (stmt) == GIMPLE_ASSIGN
702 && found_return
703 && gimple_assign_single_p (stmt)
704 && (auto_var_in_fn_p (gimple_assign_rhs1 (stmt),
705 current_function_decl)
706 || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))
707 && retval == gimple_assign_lhs (stmt))
708 ;
709 else if (gimple_code (stmt) == GIMPLE_RETURN)
710 {
711 found_return = true;
712 retval = gimple_return_retval (stmt);
713 }
714 else
715 break;
716 }
717 if (gsi_end_p (bsi) && found_return)
718 return_bb = e->src;
719
720 return return_bb;
721 }
722
723 /* Given return basic block RETURN_BB, see where return value is really
724 stored. */
725 static tree
find_retval(basic_block return_bb)726 find_retval (basic_block return_bb)
727 {
728 gimple_stmt_iterator bsi;
729 for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
730 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
731 return gimple_return_retval (gsi_stmt (bsi));
732 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
733 && !gimple_clobber_p (gsi_stmt (bsi)))
734 return gimple_assign_rhs1 (gsi_stmt (bsi));
735 return NULL;
736 }
737
738 /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic
739 variable, mark it as used in bitmap passed via DATA.
740 Return true when access to T prevents splitting the function. */
741
742 static bool
mark_nonssa_use(gimple,tree t,tree,void * data)743 mark_nonssa_use (gimple, tree t, tree, void *data)
744 {
745 t = get_base_address (t);
746
747 if (!t || is_gimple_reg (t))
748 return false;
749
750 /* At present we can't pass non-SSA arguments to split function.
751 FIXME: this can be relaxed by passing references to arguments. */
752 if (TREE_CODE (t) == PARM_DECL)
753 {
754 if (dump_file && (dump_flags & TDF_DETAILS))
755 fprintf (dump_file,
756 "Cannot split: use of non-ssa function parameter.\n");
757 return true;
758 }
759
760 if ((TREE_CODE (t) == VAR_DECL
761 && auto_var_in_fn_p (t, current_function_decl))
762 || TREE_CODE (t) == RESULT_DECL
763 || (TREE_CODE (t) == LABEL_DECL
764 && FORCED_LABEL (t)))
765 bitmap_set_bit ((bitmap)data, DECL_UID (t));
766
767 /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want
768 to pretend that the value pointed to is actual result decl. */
769 if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
770 && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
771 && SSA_NAME_VAR (TREE_OPERAND (t, 0))
772 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
773 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
774 return
775 bitmap_bit_p ((bitmap)data,
776 DECL_UID (DECL_RESULT (current_function_decl)));
777
778 return false;
779 }
780
781 /* Compute local properties of basic block BB we collect when looking for
782 split points. We look for ssa defs and store them in SET_SSA_NAMES,
783 for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic
784 vars stored in NON_SSA_VARS.
785
786 When BB has edge to RETURN_BB, collect uses in RETURN_BB too.
787
788 Return false when BB contains something that prevents it from being put into
789 split function. */
790
791 static bool
visit_bb(basic_block bb,basic_block return_bb,bitmap set_ssa_names,bitmap used_ssa_names,bitmap non_ssa_vars)792 visit_bb (basic_block bb, basic_block return_bb,
793 bitmap set_ssa_names, bitmap used_ssa_names,
794 bitmap non_ssa_vars)
795 {
796 gimple_stmt_iterator bsi;
797 edge e;
798 edge_iterator ei;
799 bool can_split = true;
800
801 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
802 {
803 gimple stmt = gsi_stmt (bsi);
804 tree op;
805 ssa_op_iter iter;
806 tree decl;
807
808 if (is_gimple_debug (stmt))
809 continue;
810
811 if (gimple_clobber_p (stmt))
812 continue;
813
814 /* FIXME: We can split regions containing EH. We can not however
815 split RESX, EH_DISPATCH and EH_POINTER referring to same region
816 into different partitions. This would require tracking of
817 EH regions and checking in consider_split_point if they
818 are not used elsewhere. */
819 if (gimple_code (stmt) == GIMPLE_RESX)
820 {
821 if (dump_file && (dump_flags & TDF_DETAILS))
822 fprintf (dump_file, "Cannot split: resx.\n");
823 can_split = false;
824 }
825 if (gimple_code (stmt) == GIMPLE_EH_DISPATCH)
826 {
827 if (dump_file && (dump_flags & TDF_DETAILS))
828 fprintf (dump_file, "Cannot split: eh dispatch.\n");
829 can_split = false;
830 }
831
832 /* Check builtins that prevent splitting. */
833 if (gimple_code (stmt) == GIMPLE_CALL
834 && (decl = gimple_call_fndecl (stmt)) != NULL_TREE
835 && DECL_BUILT_IN (decl)
836 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
837 switch (DECL_FUNCTION_CODE (decl))
838 {
839 /* FIXME: once we will allow passing non-parm values to split part,
840 we need to be sure to handle correct builtin_stack_save and
841 builtin_stack_restore. At the moment we are safe; there is no
842 way to store builtin_stack_save result in non-SSA variable
843 since all calls to those are compiler generated. */
844 case BUILT_IN_APPLY:
845 case BUILT_IN_APPLY_ARGS:
846 case BUILT_IN_VA_START:
847 if (dump_file && (dump_flags & TDF_DETAILS))
848 fprintf (dump_file,
849 "Cannot split: builtin_apply and va_start.\n");
850 can_split = false;
851 break;
852 case BUILT_IN_EH_POINTER:
853 if (dump_file && (dump_flags & TDF_DETAILS))
854 fprintf (dump_file, "Cannot split: builtin_eh_pointer.\n");
855 can_split = false;
856 break;
857 default:
858 break;
859 }
860
861 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
862 bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
863 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
864 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
865 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
866 mark_nonssa_use,
867 mark_nonssa_use,
868 mark_nonssa_use);
869 }
870 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
871 {
872 gimple stmt = gsi_stmt (bsi);
873 unsigned int i;
874
875 if (virtual_operand_p (gimple_phi_result (stmt)))
876 continue;
877 bitmap_set_bit (set_ssa_names,
878 SSA_NAME_VERSION (gimple_phi_result (stmt)));
879 for (i = 0; i < gimple_phi_num_args (stmt); i++)
880 {
881 tree op = gimple_phi_arg_def (stmt, i);
882 if (TREE_CODE (op) == SSA_NAME)
883 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
884 }
885 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
886 mark_nonssa_use,
887 mark_nonssa_use,
888 mark_nonssa_use);
889 }
890 /* Record also uses coming from PHI operand in return BB. */
891 FOR_EACH_EDGE (e, ei, bb->succs)
892 if (e->dest == return_bb)
893 {
894 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
895 {
896 gimple stmt = gsi_stmt (bsi);
897 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
898
899 if (virtual_operand_p (gimple_phi_result (stmt)))
900 continue;
901 if (TREE_CODE (op) == SSA_NAME)
902 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
903 else
904 can_split &= !mark_nonssa_use (stmt, op, op, non_ssa_vars);
905 }
906 }
907 return can_split;
908 }
909
910 /* Stack entry for recursive DFS walk in find_split_point. */
911
912 typedef struct
913 {
914 /* Basic block we are examining. */
915 basic_block bb;
916
917 /* SSA names set and used by the BB and all BBs reachable
918 from it via DFS walk. */
919 bitmap set_ssa_names, used_ssa_names;
920 bitmap non_ssa_vars;
921
922 /* All BBS visited from this BB via DFS walk. */
923 bitmap bbs_visited;
924
925 /* Last examined edge in DFS walk. Since we walk unoriented graph,
926 the value is up to sum of incoming and outgoing edges of BB. */
927 unsigned int edge_num;
928
929 /* Stack entry index of earliest BB reachable from current BB
930 or any BB visited later in DFS walk. */
931 int earliest;
932
933 /* Overall time and size of all BBs reached from this BB in DFS walk. */
934 int overall_time, overall_size;
935
936 /* When false we can not split on this BB. */
937 bool can_split;
938 } stack_entry;
939
940
941 /* Find all articulations and call consider_split on them.
942 OVERALL_TIME and OVERALL_SIZE is time and size of the function.
943
944 We perform basic algorithm for finding an articulation in a graph
945 created from CFG by considering it to be an unoriented graph.
946
947 The articulation is discovered via DFS walk. We collect earliest
948 basic block on stack that is reachable via backward edge. Articulation
949 is any basic block such that there is no backward edge bypassing it.
950 To reduce stack usage we maintain heap allocated stack in STACK vector.
951 AUX pointer of BB is set to index it appears in the stack or -1 once
952 it is visited and popped off the stack.
953
954 The algorithm finds articulation after visiting the whole component
955 reachable by it. This makes it convenient to collect information about
956 the component used by consider_split. */
957
958 static void
find_split_points(int overall_time,int overall_size)959 find_split_points (int overall_time, int overall_size)
960 {
961 stack_entry first;
962 vec<stack_entry> stack = vNULL;
963 basic_block bb;
964 basic_block return_bb = find_return_bb ();
965 struct split_point current;
966
967 current.header_time = overall_time;
968 current.header_size = overall_size;
969 current.split_time = 0;
970 current.split_size = 0;
971 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
972
973 first.bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
974 first.edge_num = 0;
975 first.overall_time = 0;
976 first.overall_size = 0;
977 first.earliest = INT_MAX;
978 first.set_ssa_names = 0;
979 first.used_ssa_names = 0;
980 first.non_ssa_vars = 0;
981 first.bbs_visited = 0;
982 first.can_split = false;
983 stack.safe_push (first);
984 ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(intptr_t)-1;
985
986 while (!stack.is_empty ())
987 {
988 stack_entry *entry = &stack.last ();
989
990 /* We are walking an acyclic graph, so edge_num counts
991 succ and pred edges together. However when considering
992 articulation, we want to have processed everything reachable
993 from articulation but nothing that reaches into it. */
994 if (entry->edge_num == EDGE_COUNT (entry->bb->succs)
995 && entry->bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
996 {
997 int pos = stack.length ();
998 entry->can_split &= visit_bb (entry->bb, return_bb,
999 entry->set_ssa_names,
1000 entry->used_ssa_names,
1001 entry->non_ssa_vars);
1002 if (pos <= entry->earliest && !entry->can_split
1003 && dump_file && (dump_flags & TDF_DETAILS))
1004 fprintf (dump_file,
1005 "found articulation at bb %i but can not split\n",
1006 entry->bb->index);
1007 if (pos <= entry->earliest && entry->can_split)
1008 {
1009 if (dump_file && (dump_flags & TDF_DETAILS))
1010 fprintf (dump_file, "found articulation at bb %i\n",
1011 entry->bb->index);
1012 current.entry_bb = entry->bb;
1013 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
1014 bitmap_and_compl (current.ssa_names_to_pass,
1015 entry->used_ssa_names, entry->set_ssa_names);
1016 current.header_time = overall_time - entry->overall_time;
1017 current.header_size = overall_size - entry->overall_size;
1018 current.split_time = entry->overall_time;
1019 current.split_size = entry->overall_size;
1020 current.split_bbs = entry->bbs_visited;
1021 consider_split (¤t, entry->non_ssa_vars, return_bb);
1022 BITMAP_FREE (current.ssa_names_to_pass);
1023 }
1024 }
1025 /* Do actual DFS walk. */
1026 if (entry->edge_num
1027 < (EDGE_COUNT (entry->bb->succs)
1028 + EDGE_COUNT (entry->bb->preds)))
1029 {
1030 edge e;
1031 basic_block dest;
1032 if (entry->edge_num < EDGE_COUNT (entry->bb->succs))
1033 {
1034 e = EDGE_SUCC (entry->bb, entry->edge_num);
1035 dest = e->dest;
1036 }
1037 else
1038 {
1039 e = EDGE_PRED (entry->bb, entry->edge_num
1040 - EDGE_COUNT (entry->bb->succs));
1041 dest = e->src;
1042 }
1043
1044 entry->edge_num++;
1045
1046 /* New BB to visit, push it to the stack. */
1047 if (dest != return_bb && dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1048 && !dest->aux)
1049 {
1050 stack_entry new_entry;
1051
1052 new_entry.bb = dest;
1053 new_entry.edge_num = 0;
1054 new_entry.overall_time
1055 = bb_info_vec[dest->index].time;
1056 new_entry.overall_size
1057 = bb_info_vec[dest->index].size;
1058 new_entry.earliest = INT_MAX;
1059 new_entry.set_ssa_names = BITMAP_ALLOC (NULL);
1060 new_entry.used_ssa_names = BITMAP_ALLOC (NULL);
1061 new_entry.bbs_visited = BITMAP_ALLOC (NULL);
1062 new_entry.non_ssa_vars = BITMAP_ALLOC (NULL);
1063 new_entry.can_split = true;
1064 bitmap_set_bit (new_entry.bbs_visited, dest->index);
1065 stack.safe_push (new_entry);
1066 dest->aux = (void *)(intptr_t)stack.length ();
1067 }
1068 /* Back edge found, record the earliest point. */
1069 else if ((intptr_t)dest->aux > 0
1070 && (intptr_t)dest->aux < entry->earliest)
1071 entry->earliest = (intptr_t)dest->aux;
1072 }
1073 /* We are done with examining the edges. Pop off the value from stack
1074 and merge stuff we accumulate during the walk. */
1075 else if (entry->bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
1076 {
1077 stack_entry *prev = &stack[stack.length () - 2];
1078
1079 entry->bb->aux = (void *)(intptr_t)-1;
1080 prev->can_split &= entry->can_split;
1081 if (prev->set_ssa_names)
1082 {
1083 bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names);
1084 bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names);
1085 bitmap_ior_into (prev->bbs_visited, entry->bbs_visited);
1086 bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars);
1087 }
1088 if (prev->earliest > entry->earliest)
1089 prev->earliest = entry->earliest;
1090 prev->overall_time += entry->overall_time;
1091 prev->overall_size += entry->overall_size;
1092 BITMAP_FREE (entry->set_ssa_names);
1093 BITMAP_FREE (entry->used_ssa_names);
1094 BITMAP_FREE (entry->bbs_visited);
1095 BITMAP_FREE (entry->non_ssa_vars);
1096 stack.pop ();
1097 }
1098 else
1099 stack.pop ();
1100 }
1101 ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = NULL;
1102 FOR_EACH_BB_FN (bb, cfun)
1103 bb->aux = NULL;
1104 stack.release ();
1105 BITMAP_FREE (current.ssa_names_to_pass);
1106 }
1107
1108 /* Split function at SPLIT_POINT. */
1109
1110 static void
split_function(struct split_point * split_point)1111 split_function (struct split_point *split_point)
1112 {
1113 vec<tree> args_to_pass = vNULL;
1114 bitmap args_to_skip;
1115 tree parm;
1116 int num = 0;
1117 struct cgraph_node *node, *cur_node = cgraph_get_node (current_function_decl);
1118 basic_block return_bb = find_return_bb ();
1119 basic_block call_bb;
1120 gimple_stmt_iterator gsi;
1121 gimple call;
1122 edge e;
1123 edge_iterator ei;
1124 tree retval = NULL, real_retval = NULL;
1125 bool split_part_return_p = false;
1126 gimple last_stmt = NULL;
1127 unsigned int i;
1128 tree arg, ddef;
1129 vec<tree, va_gc> **debug_args = NULL;
1130
1131 if (dump_file)
1132 {
1133 fprintf (dump_file, "\n\nSplitting function at:\n");
1134 dump_split_point (dump_file, split_point);
1135 }
1136
1137 if (cur_node->local.can_change_signature)
1138 args_to_skip = BITMAP_ALLOC (NULL);
1139 else
1140 args_to_skip = NULL;
1141
1142 /* Collect the parameters of new function and args_to_skip bitmap. */
1143 for (parm = DECL_ARGUMENTS (current_function_decl);
1144 parm; parm = DECL_CHAIN (parm), num++)
1145 if (args_to_skip
1146 && (!is_gimple_reg (parm)
1147 || (ddef = ssa_default_def (cfun, parm)) == NULL_TREE
1148 || !bitmap_bit_p (split_point->ssa_names_to_pass,
1149 SSA_NAME_VERSION (ddef))))
1150 bitmap_set_bit (args_to_skip, num);
1151 else
1152 {
1153 /* This parm might not have been used up to now, but is going to be
1154 used, hence register it. */
1155 if (is_gimple_reg (parm))
1156 arg = get_or_create_ssa_default_def (cfun, parm);
1157 else
1158 arg = parm;
1159
1160 if (!useless_type_conversion_p (DECL_ARG_TYPE (parm), TREE_TYPE (arg)))
1161 arg = fold_convert (DECL_ARG_TYPE (parm), arg);
1162 args_to_pass.safe_push (arg);
1163 }
1164
1165 /* See if the split function will return. */
1166 FOR_EACH_EDGE (e, ei, return_bb->preds)
1167 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1168 break;
1169 if (e)
1170 split_part_return_p = true;
1171
1172 /* Add return block to what will become the split function.
1173 We do not return; no return block is needed. */
1174 if (!split_part_return_p)
1175 ;
1176 /* We have no return block, so nothing is needed. */
1177 else if (return_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1178 ;
1179 /* When we do not want to return value, we need to construct
1180 new return block with empty return statement.
1181 FIXME: Once we are able to change return type, we should change function
1182 to return void instead of just outputting function with undefined return
1183 value. For structures this affects quality of codegen. */
1184 else if (!split_point->split_part_set_retval
1185 && find_retval (return_bb))
1186 {
1187 bool redirected = true;
1188 basic_block new_return_bb = create_basic_block (NULL, 0, return_bb);
1189 gimple_stmt_iterator gsi = gsi_start_bb (new_return_bb);
1190 gsi_insert_after (&gsi, gimple_build_return (NULL), GSI_NEW_STMT);
1191 while (redirected)
1192 {
1193 redirected = false;
1194 FOR_EACH_EDGE (e, ei, return_bb->preds)
1195 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1196 {
1197 new_return_bb->count += e->count;
1198 new_return_bb->frequency += EDGE_FREQUENCY (e);
1199 redirect_edge_and_branch (e, new_return_bb);
1200 redirected = true;
1201 break;
1202 }
1203 }
1204 e = make_edge (new_return_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1205 e->probability = REG_BR_PROB_BASE;
1206 e->count = new_return_bb->count;
1207 if (current_loops)
1208 add_bb_to_loop (new_return_bb, current_loops->tree_root);
1209 bitmap_set_bit (split_point->split_bbs, new_return_bb->index);
1210 }
1211 /* When we pass around the value, use existing return block. */
1212 else
1213 bitmap_set_bit (split_point->split_bbs, return_bb->index);
1214
1215 /* If RETURN_BB has virtual operand PHIs, they must be removed and the
1216 virtual operand marked for renaming as we change the CFG in a way that
1217 tree-inline is not able to compensate for.
1218
1219 Note this can happen whether or not we have a return value. If we have
1220 a return value, then RETURN_BB may have PHIs for real operands too. */
1221 if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1222 {
1223 bool phi_p = false;
1224 for (gsi = gsi_start_phis (return_bb); !gsi_end_p (gsi);)
1225 {
1226 gimple stmt = gsi_stmt (gsi);
1227 if (!virtual_operand_p (gimple_phi_result (stmt)))
1228 {
1229 gsi_next (&gsi);
1230 continue;
1231 }
1232 mark_virtual_phi_result_for_renaming (stmt);
1233 remove_phi_node (&gsi, true);
1234 phi_p = true;
1235 }
1236 /* In reality we have to rename the reaching definition of the
1237 virtual operand at return_bb as we will eventually release it
1238 when we remove the code region we outlined.
1239 So we have to rename all immediate virtual uses of that region
1240 if we didn't see a PHI definition yet. */
1241 /* ??? In real reality we want to set the reaching vdef of the
1242 entry of the SESE region as the vuse of the call and the reaching
1243 vdef of the exit of the SESE region as the vdef of the call. */
1244 if (!phi_p)
1245 for (gsi = gsi_start_bb (return_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1246 {
1247 gimple stmt = gsi_stmt (gsi);
1248 if (gimple_vuse (stmt))
1249 {
1250 gimple_set_vuse (stmt, NULL_TREE);
1251 update_stmt (stmt);
1252 }
1253 if (gimple_vdef (stmt))
1254 break;
1255 }
1256 }
1257
1258 /* Now create the actual clone. */
1259 rebuild_cgraph_edges ();
1260 node = cgraph_function_versioning (cur_node, vNULL,
1261 NULL,
1262 args_to_skip,
1263 !split_part_return_p,
1264 split_point->split_bbs,
1265 split_point->entry_bb, "part");
1266
1267 /* Let's take a time profile for splitted function. */
1268 node->tp_first_run = cur_node->tp_first_run + 1;
1269
1270 /* For usual cloning it is enough to clear builtin only when signature
1271 changes. For partial inlining we however can not expect the part
1272 of builtin implementation to have same semantic as the whole. */
1273 if (DECL_BUILT_IN (node->decl))
1274 {
1275 DECL_BUILT_IN_CLASS (node->decl) = NOT_BUILT_IN;
1276 DECL_FUNCTION_CODE (node->decl) = (enum built_in_function) 0;
1277 }
1278 /* If the original function is declared inline, there is no point in issuing
1279 a warning for the non-inlinable part. */
1280 DECL_NO_INLINE_WARNING_P (node->decl) = 1;
1281 cgraph_node_remove_callees (cur_node);
1282 ipa_remove_all_references (&cur_node->ref_list);
1283 if (!split_part_return_p)
1284 TREE_THIS_VOLATILE (node->decl) = 1;
1285 if (dump_file)
1286 dump_function_to_file (node->decl, dump_file, dump_flags);
1287
1288 /* Create the basic block we place call into. It is the entry basic block
1289 split after last label. */
1290 call_bb = split_point->entry_bb;
1291 for (gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);)
1292 if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
1293 {
1294 last_stmt = gsi_stmt (gsi);
1295 gsi_next (&gsi);
1296 }
1297 else
1298 break;
1299 e = split_block (split_point->entry_bb, last_stmt);
1300 remove_edge (e);
1301
1302 /* Produce the call statement. */
1303 gsi = gsi_last_bb (call_bb);
1304 FOR_EACH_VEC_ELT (args_to_pass, i, arg)
1305 if (!is_gimple_val (arg))
1306 {
1307 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE,
1308 false, GSI_CONTINUE_LINKING);
1309 args_to_pass[i] = arg;
1310 }
1311 call = gimple_build_call_vec (node->decl, args_to_pass);
1312 gimple_set_block (call, DECL_INITIAL (current_function_decl));
1313 args_to_pass.release ();
1314
1315 /* For optimized away parameters, add on the caller side
1316 before the call
1317 DEBUG D#X => parm_Y(D)
1318 stmts and associate D#X with parm in decl_debug_args_lookup
1319 vector to say for debug info that if parameter parm had been passed,
1320 it would have value parm_Y(D). */
1321 if (args_to_skip)
1322 for (parm = DECL_ARGUMENTS (current_function_decl), num = 0;
1323 parm; parm = DECL_CHAIN (parm), num++)
1324 if (bitmap_bit_p (args_to_skip, num)
1325 && is_gimple_reg (parm))
1326 {
1327 tree ddecl;
1328 gimple def_temp;
1329
1330 /* This needs to be done even without MAY_HAVE_DEBUG_STMTS,
1331 otherwise if it didn't exist before, we'd end up with
1332 different SSA_NAME_VERSIONs between -g and -g0. */
1333 arg = get_or_create_ssa_default_def (cfun, parm);
1334 if (!MAY_HAVE_DEBUG_STMTS)
1335 continue;
1336
1337 if (debug_args == NULL)
1338 debug_args = decl_debug_args_insert (node->decl);
1339 ddecl = make_node (DEBUG_EXPR_DECL);
1340 DECL_ARTIFICIAL (ddecl) = 1;
1341 TREE_TYPE (ddecl) = TREE_TYPE (parm);
1342 DECL_MODE (ddecl) = DECL_MODE (parm);
1343 vec_safe_push (*debug_args, DECL_ORIGIN (parm));
1344 vec_safe_push (*debug_args, ddecl);
1345 def_temp = gimple_build_debug_bind (ddecl, unshare_expr (arg),
1346 call);
1347 gsi_insert_after (&gsi, def_temp, GSI_NEW_STMT);
1348 }
1349 /* And on the callee side, add
1350 DEBUG D#Y s=> parm
1351 DEBUG var => D#Y
1352 stmts to the first bb where var is a VAR_DECL created for the
1353 optimized away parameter in DECL_INITIAL block. This hints
1354 in the debug info that var (whole DECL_ORIGIN is the parm PARM_DECL)
1355 is optimized away, but could be looked up at the call site
1356 as value of D#X there. */
1357 if (debug_args != NULL)
1358 {
1359 unsigned int i;
1360 tree var, vexpr;
1361 gimple_stmt_iterator cgsi;
1362 gimple def_temp;
1363
1364 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1365 var = BLOCK_VARS (DECL_INITIAL (node->decl));
1366 i = vec_safe_length (*debug_args);
1367 cgsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1368 do
1369 {
1370 i -= 2;
1371 while (var != NULL_TREE
1372 && DECL_ABSTRACT_ORIGIN (var) != (**debug_args)[i])
1373 var = TREE_CHAIN (var);
1374 if (var == NULL_TREE)
1375 break;
1376 vexpr = make_node (DEBUG_EXPR_DECL);
1377 parm = (**debug_args)[i];
1378 DECL_ARTIFICIAL (vexpr) = 1;
1379 TREE_TYPE (vexpr) = TREE_TYPE (parm);
1380 DECL_MODE (vexpr) = DECL_MODE (parm);
1381 def_temp = gimple_build_debug_source_bind (vexpr, parm,
1382 NULL);
1383 gsi_insert_before (&cgsi, def_temp, GSI_SAME_STMT);
1384 def_temp = gimple_build_debug_bind (var, vexpr, NULL);
1385 gsi_insert_before (&cgsi, def_temp, GSI_SAME_STMT);
1386 }
1387 while (i);
1388 pop_cfun ();
1389 }
1390
1391 /* We avoid address being taken on any variable used by split part,
1392 so return slot optimization is always possible. Moreover this is
1393 required to make DECL_BY_REFERENCE work. */
1394 if (aggregate_value_p (DECL_RESULT (current_function_decl),
1395 TREE_TYPE (current_function_decl))
1396 && (!is_gimple_reg_type (TREE_TYPE (DECL_RESULT (current_function_decl)))
1397 || DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))))
1398 gimple_call_set_return_slot_opt (call, true);
1399
1400 /* Update return value. This is bit tricky. When we do not return,
1401 do nothing. When we return we might need to update return_bb
1402 or produce a new return statement. */
1403 if (!split_part_return_p)
1404 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1405 else
1406 {
1407 e = make_edge (call_bb, return_bb,
1408 return_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
1409 ? 0 : EDGE_FALLTHRU);
1410 e->count = call_bb->count;
1411 e->probability = REG_BR_PROB_BASE;
1412
1413 /* If there is return basic block, see what value we need to store
1414 return value into and put call just before it. */
1415 if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1416 {
1417 real_retval = retval = find_retval (return_bb);
1418
1419 if (real_retval && split_point->split_part_set_retval)
1420 {
1421 gimple_stmt_iterator psi;
1422
1423 /* See if we need new SSA_NAME for the result.
1424 When DECL_BY_REFERENCE is true, retval is actually pointer to
1425 return value and it is constant in whole function. */
1426 if (TREE_CODE (retval) == SSA_NAME
1427 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1428 {
1429 retval = copy_ssa_name (retval, call);
1430
1431 /* See if there is PHI defining return value. */
1432 for (psi = gsi_start_phis (return_bb);
1433 !gsi_end_p (psi); gsi_next (&psi))
1434 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (psi))))
1435 break;
1436
1437 /* When there is PHI, just update its value. */
1438 if (TREE_CODE (retval) == SSA_NAME
1439 && !gsi_end_p (psi))
1440 add_phi_arg (gsi_stmt (psi), retval, e, UNKNOWN_LOCATION);
1441 /* Otherwise update the return BB itself.
1442 find_return_bb allows at most one assignment to return value,
1443 so update first statement. */
1444 else
1445 {
1446 gimple_stmt_iterator bsi;
1447 for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi);
1448 gsi_next (&bsi))
1449 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
1450 {
1451 gimple_return_set_retval (gsi_stmt (bsi), retval);
1452 break;
1453 }
1454 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
1455 && !gimple_clobber_p (gsi_stmt (bsi)))
1456 {
1457 gimple_assign_set_rhs1 (gsi_stmt (bsi), retval);
1458 break;
1459 }
1460 update_stmt (gsi_stmt (bsi));
1461 }
1462 }
1463 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1464 {
1465 gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1466 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1467 }
1468 else
1469 {
1470 tree restype;
1471 restype = TREE_TYPE (DECL_RESULT (current_function_decl));
1472 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1473 if (!useless_type_conversion_p (TREE_TYPE (retval), restype))
1474 {
1475 gimple cpy;
1476 tree tem = create_tmp_reg (restype, NULL);
1477 tem = make_ssa_name (tem, call);
1478 cpy = gimple_build_assign_with_ops (NOP_EXPR, retval,
1479 tem, NULL_TREE);
1480 gsi_insert_after (&gsi, cpy, GSI_NEW_STMT);
1481 retval = tem;
1482 }
1483 gimple_call_set_lhs (call, retval);
1484 update_stmt (call);
1485 }
1486 }
1487 else
1488 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1489 }
1490 /* We don't use return block (there is either no return in function or
1491 multiple of them). So create new basic block with return statement.
1492 */
1493 else
1494 {
1495 gimple ret;
1496 if (split_point->split_part_set_retval
1497 && !VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
1498 {
1499 retval = DECL_RESULT (current_function_decl);
1500
1501 /* We use temporary register to hold value when aggregate_value_p
1502 is false. Similarly for DECL_BY_REFERENCE we must avoid extra
1503 copy. */
1504 if (!aggregate_value_p (retval, TREE_TYPE (current_function_decl))
1505 && !DECL_BY_REFERENCE (retval))
1506 retval = create_tmp_reg (TREE_TYPE (retval), NULL);
1507 if (is_gimple_reg (retval))
1508 {
1509 /* When returning by reference, there is only one SSA name
1510 assigned to RESULT_DECL (that is pointer to return value).
1511 Look it up or create new one if it is missing. */
1512 if (DECL_BY_REFERENCE (retval))
1513 retval = get_or_create_ssa_default_def (cfun, retval);
1514 /* Otherwise produce new SSA name for return value. */
1515 else
1516 retval = make_ssa_name (retval, call);
1517 }
1518 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1519 gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1520 else
1521 gimple_call_set_lhs (call, retval);
1522 }
1523 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1524 ret = gimple_build_return (retval);
1525 gsi_insert_after (&gsi, ret, GSI_NEW_STMT);
1526 }
1527 }
1528 free_dominance_info (CDI_DOMINATORS);
1529 free_dominance_info (CDI_POST_DOMINATORS);
1530 compute_inline_parameters (node, true);
1531 }
1532
1533 /* Execute function splitting pass. */
1534
1535 static unsigned int
execute_split_functions(void)1536 execute_split_functions (void)
1537 {
1538 gimple_stmt_iterator bsi;
1539 basic_block bb;
1540 int overall_time = 0, overall_size = 0;
1541 int todo = 0;
1542 struct cgraph_node *node = cgraph_get_node (current_function_decl);
1543
1544 if (flags_from_decl_or_type (current_function_decl)
1545 & (ECF_NORETURN|ECF_MALLOC))
1546 {
1547 if (dump_file)
1548 fprintf (dump_file, "Not splitting: noreturn/malloc function.\n");
1549 return 0;
1550 }
1551 if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
1552 {
1553 if (dump_file)
1554 fprintf (dump_file, "Not splitting: main function.\n");
1555 return 0;
1556 }
1557 /* This can be relaxed; function might become inlinable after splitting
1558 away the uninlinable part. */
1559 if (inline_edge_summary_vec.exists ()
1560 && !inline_summary (node)->inlinable)
1561 {
1562 if (dump_file)
1563 fprintf (dump_file, "Not splitting: not inlinable.\n");
1564 return 0;
1565 }
1566 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1567 {
1568 if (dump_file)
1569 fprintf (dump_file, "Not splitting: disregarding inline limits.\n");
1570 return 0;
1571 }
1572 /* This can be relaxed; most of versioning tests actually prevents
1573 a duplication. */
1574 if (!tree_versionable_function_p (current_function_decl))
1575 {
1576 if (dump_file)
1577 fprintf (dump_file, "Not splitting: not versionable.\n");
1578 return 0;
1579 }
1580 /* FIXME: we could support this. */
1581 if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl)
1582 {
1583 if (dump_file)
1584 fprintf (dump_file, "Not splitting: nested function.\n");
1585 return 0;
1586 }
1587
1588 /* See if it makes sense to try to split.
1589 It makes sense to split if we inline, that is if we have direct calls to
1590 handle or direct calls are possibly going to appear as result of indirect
1591 inlining or LTO. Also handle -fprofile-generate as LTO to allow non-LTO
1592 training for LTO -fprofile-use build.
1593
1594 Note that we are not completely conservative about disqualifying functions
1595 called once. It is possible that the caller is called more then once and
1596 then inlining would still benefit. */
1597 if ((!node->callers
1598 /* Local functions called once will be completely inlined most of time. */
1599 || (!node->callers->next_caller && node->local.local))
1600 && !node->address_taken
1601 && (!flag_lto || !node->externally_visible))
1602 {
1603 if (dump_file)
1604 fprintf (dump_file, "Not splitting: not called directly "
1605 "or called once.\n");
1606 return 0;
1607 }
1608
1609 /* FIXME: We can actually split if splitting reduces call overhead. */
1610 if (!flag_inline_small_functions
1611 && !DECL_DECLARED_INLINE_P (current_function_decl))
1612 {
1613 if (dump_file)
1614 fprintf (dump_file, "Not splitting: not autoinlining and function"
1615 " is not inline.\n");
1616 return 0;
1617 }
1618
1619 /* We enforce splitting after loop headers when profile info is not
1620 available. */
1621 if (profile_status_for_fn (cfun) != PROFILE_READ)
1622 mark_dfs_back_edges ();
1623
1624 /* Initialize bitmap to track forbidden calls. */
1625 forbidden_dominators = BITMAP_ALLOC (NULL);
1626 calculate_dominance_info (CDI_DOMINATORS);
1627
1628 /* Compute local info about basic blocks and determine function size/time. */
1629 bb_info_vec.safe_grow_cleared (last_basic_block_for_fn (cfun) + 1);
1630 memset (&best_split_point, 0, sizeof (best_split_point));
1631 FOR_EACH_BB_FN (bb, cfun)
1632 {
1633 int time = 0;
1634 int size = 0;
1635 int freq = compute_call_stmt_bb_frequency (current_function_decl, bb);
1636
1637 if (dump_file && (dump_flags & TDF_DETAILS))
1638 fprintf (dump_file, "Basic block %i\n", bb->index);
1639
1640 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1641 {
1642 int this_time, this_size;
1643 gimple stmt = gsi_stmt (bsi);
1644
1645 this_size = estimate_num_insns (stmt, &eni_size_weights);
1646 this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
1647 size += this_size;
1648 time += this_time;
1649 check_forbidden_calls (stmt);
1650
1651 if (dump_file && (dump_flags & TDF_DETAILS))
1652 {
1653 fprintf (dump_file, " freq:%6i size:%3i time:%3i ",
1654 freq, this_size, this_time);
1655 print_gimple_stmt (dump_file, stmt, 0, 0);
1656 }
1657 }
1658 overall_time += time;
1659 overall_size += size;
1660 bb_info_vec[bb->index].time = time;
1661 bb_info_vec[bb->index].size = size;
1662 }
1663 find_split_points (overall_time, overall_size);
1664 if (best_split_point.split_bbs)
1665 {
1666 split_function (&best_split_point);
1667 BITMAP_FREE (best_split_point.ssa_names_to_pass);
1668 BITMAP_FREE (best_split_point.split_bbs);
1669 todo = TODO_update_ssa | TODO_cleanup_cfg;
1670 }
1671 BITMAP_FREE (forbidden_dominators);
1672 bb_info_vec.release ();
1673 return todo;
1674 }
1675
1676 /* Gate function splitting pass. When doing profile feedback, we want
1677 to execute the pass after profiling is read. So disable one in
1678 early optimization. */
1679
1680 static bool
gate_split_functions(void)1681 gate_split_functions (void)
1682 {
1683 return (flag_partial_inlining
1684 && !profile_arc_flag && !flag_branch_probabilities);
1685 }
1686
1687 namespace {
1688
1689 const pass_data pass_data_split_functions =
1690 {
1691 GIMPLE_PASS, /* type */
1692 "fnsplit", /* name */
1693 OPTGROUP_NONE, /* optinfo_flags */
1694 true, /* has_gate */
1695 true, /* has_execute */
1696 TV_IPA_FNSPLIT, /* tv_id */
1697 PROP_cfg, /* properties_required */
1698 0, /* properties_provided */
1699 0, /* properties_destroyed */
1700 0, /* todo_flags_start */
1701 TODO_verify_all, /* todo_flags_finish */
1702 };
1703
1704 class pass_split_functions : public gimple_opt_pass
1705 {
1706 public:
pass_split_functions(gcc::context * ctxt)1707 pass_split_functions (gcc::context *ctxt)
1708 : gimple_opt_pass (pass_data_split_functions, ctxt)
1709 {}
1710
1711 /* opt_pass methods: */
gate()1712 bool gate () { return gate_split_functions (); }
execute()1713 unsigned int execute () { return execute_split_functions (); }
1714
1715 }; // class pass_split_functions
1716
1717 } // anon namespace
1718
1719 gimple_opt_pass *
make_pass_split_functions(gcc::context * ctxt)1720 make_pass_split_functions (gcc::context *ctxt)
1721 {
1722 return new pass_split_functions (ctxt);
1723 }
1724
1725 /* Gate feedback driven function splitting pass.
1726 We don't need to split when profiling at all, we are producing
1727 lousy code anyway. */
1728
1729 static bool
gate_feedback_split_functions(void)1730 gate_feedback_split_functions (void)
1731 {
1732 return (flag_partial_inlining
1733 && flag_branch_probabilities);
1734 }
1735
1736 /* Execute function splitting pass. */
1737
1738 static unsigned int
execute_feedback_split_functions(void)1739 execute_feedback_split_functions (void)
1740 {
1741 unsigned int retval = execute_split_functions ();
1742 if (retval)
1743 retval |= TODO_rebuild_cgraph_edges;
1744 return retval;
1745 }
1746
1747 namespace {
1748
1749 const pass_data pass_data_feedback_split_functions =
1750 {
1751 GIMPLE_PASS, /* type */
1752 "feedback_fnsplit", /* name */
1753 OPTGROUP_NONE, /* optinfo_flags */
1754 true, /* has_gate */
1755 true, /* has_execute */
1756 TV_IPA_FNSPLIT, /* tv_id */
1757 PROP_cfg, /* properties_required */
1758 0, /* properties_provided */
1759 0, /* properties_destroyed */
1760 0, /* todo_flags_start */
1761 TODO_verify_all, /* todo_flags_finish */
1762 };
1763
1764 class pass_feedback_split_functions : public gimple_opt_pass
1765 {
1766 public:
pass_feedback_split_functions(gcc::context * ctxt)1767 pass_feedback_split_functions (gcc::context *ctxt)
1768 : gimple_opt_pass (pass_data_feedback_split_functions, ctxt)
1769 {}
1770
1771 /* opt_pass methods: */
gate()1772 bool gate () { return gate_feedback_split_functions (); }
execute()1773 unsigned int execute () { return execute_feedback_split_functions (); }
1774
1775 }; // class pass_feedback_split_functions
1776
1777 } // anon namespace
1778
1779 gimple_opt_pass *
make_pass_feedback_split_functions(gcc::context * ctxt)1780 make_pass_feedback_split_functions (gcc::context *ctxt)
1781 {
1782 return new pass_feedback_split_functions (ctxt);
1783 }
1784