1 /* Inlining decision heuristics.
2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
3    Contributed by Jan Hubicka
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /*  Inlining decision heuristics
22 
23     The implementation of inliner is organized as follows:
24 
25     inlining heuristics limits
26 
27       can_inline_edge_p allow to check that particular inlining is allowed
28       by the limits specified by user (allowed function growth, growth and so
29       on).
30 
31       Functions are inlined when it is obvious the result is profitable (such
32       as functions called once or when inlining reduce code size).
33       In addition to that we perform inlining of small functions and recursive
34       inlining.
35 
36     inlining heuristics
37 
38        The inliner itself is split into two passes:
39 
40        pass_early_inlining
41 
42 	 Simple local inlining pass inlining callees into current function.
43 	 This pass makes no use of whole unit analysis and thus it can do only
44 	 very simple decisions based on local properties.
45 
46 	 The strength of the pass is that it is run in topological order
47 	 (reverse postorder) on the callgraph. Functions are converted into SSA
48 	 form just before this pass and optimized subsequently. As a result, the
49 	 callees of the function seen by the early inliner was already optimized
50 	 and results of early inlining adds a lot of optimization opportunities
51 	 for the local optimization.
52 
53 	 The pass handle the obvious inlining decisions within the compilation
54 	 unit - inlining auto inline functions, inlining for size and
55 	 flattening.
56 
57 	 main strength of the pass is the ability to eliminate abstraction
58 	 penalty in C++ code (via combination of inlining and early
59 	 optimization) and thus improve quality of analysis done by real IPA
60 	 optimizers.
61 
62 	 Because of lack of whole unit knowledge, the pass cannot really make
63 	 good code size/performance tradeoffs.  It however does very simple
64 	 speculative inlining allowing code size to grow by
65 	 EARLY_INLINING_INSNS when callee is leaf function.  In this case the
66 	 optimizations performed later are very likely to eliminate the cost.
67 
68        pass_ipa_inline
69 
70 	 This is the real inliner able to handle inlining with whole program
71 	 knowledge. It performs following steps:
72 
73 	 1) inlining of small functions.  This is implemented by greedy
74 	 algorithm ordering all inlinable cgraph edges by their badness and
75 	 inlining them in this order as long as inline limits allows doing so.
76 
77 	 This heuristics is not very good on inlining recursive calls. Recursive
78 	 calls can be inlined with results similar to loop unrolling. To do so,
79 	 special purpose recursive inliner is executed on function when
80 	 recursive edge is met as viable candidate.
81 
82 	 2) Unreachable functions are removed from callgraph.  Inlining leads
83 	 to devirtualization and other modification of callgraph so functions
84 	 may become unreachable during the process. Also functions declared as
85 	 extern inline or virtual functions are removed, since after inlining
86 	 we no longer need the offline bodies.
87 
88 	 3) Functions called once and not exported from the unit are inlined.
89 	 This should almost always lead to reduction of code size by eliminating
90 	 the need for offline copy of the function.  */
91 
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "target.h"
97 #include "rtl.h"
98 #include "tree.h"
99 #include "gimple.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
103 #include "cgraph.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
106 #include "calls.h"
107 #include "tree-inline.h"
108 #include "params.h"
109 #include "profile.h"
110 #include "symbol-summary.h"
111 #include "tree-vrp.h"
112 #include "ipa-prop.h"
113 #include "ipa-fnsummary.h"
114 #include "ipa-inline.h"
115 #include "ipa-utils.h"
116 #include "sreal.h"
117 #include "auto-profile.h"
118 #include "builtins.h"
119 #include "fibonacci_heap.h"
120 #include "stringpool.h"
121 #include "attribs.h"
122 #include "asan.h"
123 
124 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
125 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
126 
127 /* Statistics we collect about inlining algorithm.  */
128 static int overall_size;
129 static profile_count max_count;
130 static profile_count spec_rem;
131 
132 /* Return false when inlining edge E would lead to violating
133    limits on function unit growth or stack usage growth.
134 
135    The relative function body growth limit is present generally
136    to avoid problems with non-linear behavior of the compiler.
137    To allow inlining huge functions into tiny wrapper, the limit
138    is always based on the bigger of the two functions considered.
139 
140    For stack growth limits we always base the growth in stack usage
141    of the callers.  We want to prevent applications from segfaulting
142    on stack overflow when functions with huge stack frames gets
143    inlined. */
144 
145 static bool
caller_growth_limits(struct cgraph_edge * e)146 caller_growth_limits (struct cgraph_edge *e)
147 {
148   struct cgraph_node *to = e->caller;
149   struct cgraph_node *what = e->callee->ultimate_alias_target ();
150   int newsize;
151   int limit = 0;
152   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
153   ipa_fn_summary *info, *what_info;
154   ipa_fn_summary *outer_info = ipa_fn_summaries->get (to);
155 
156   /* Look for function e->caller is inlined to.  While doing
157      so work out the largest function body on the way.  As
158      described above, we want to base our function growth
159      limits based on that.  Not on the self size of the
160      outer function, not on the self size of inline code
161      we immediately inline to.  This is the most relaxed
162      interpretation of the rule "do not grow large functions
163      too much in order to prevent compiler from exploding".  */
164   while (true)
165     {
166       info = ipa_fn_summaries->get (to);
167       if (limit < info->self_size)
168 	limit = info->self_size;
169       if (stack_size_limit < info->estimated_self_stack_size)
170 	stack_size_limit = info->estimated_self_stack_size;
171       if (to->global.inlined_to)
172         to = to->callers->caller;
173       else
174 	break;
175     }
176 
177   what_info = ipa_fn_summaries->get (what);
178 
179   if (limit < what_info->self_size)
180     limit = what_info->self_size;
181 
182   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
183 
184   /* Check the size after inlining against the function limits.  But allow
185      the function to shrink if it went over the limits by forced inlining.  */
186   newsize = estimate_size_after_inlining (to, e);
187   if (newsize >= info->size
188       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
189       && newsize > limit)
190     {
191       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
192       return false;
193     }
194 
195   if (!what_info->estimated_stack_size)
196     return true;
197 
198   /* FIXME: Stack size limit often prevents inlining in Fortran programs
199      due to large i/o datastructures used by the Fortran front-end.
200      We ought to ignore this limit when we know that the edge is executed
201      on every invocation of the caller (i.e. its call statement dominates
202      exit block).  We do not track this information, yet.  */
203   stack_size_limit += ((gcov_type)stack_size_limit
204 		       * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
205 
206   inlined_stack = (outer_info->stack_frame_offset
207 		   + outer_info->estimated_self_stack_size
208 		   + what_info->estimated_stack_size);
209   /* Check new stack consumption with stack consumption at the place
210      stack is used.  */
211   if (inlined_stack > stack_size_limit
212       /* If function already has large stack usage from sibling
213 	 inline call, we can inline, too.
214 	 This bit overoptimistically assume that we are good at stack
215 	 packing.  */
216       && inlined_stack > info->estimated_stack_size
217       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
218     {
219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
220       return false;
221     }
222   return true;
223 }
224 
225 /* Dump info about why inlining has failed.  */
226 
227 static void
report_inline_failed_reason(struct cgraph_edge * e)228 report_inline_failed_reason (struct cgraph_edge *e)
229 {
230   if (dump_enabled_p ())
231     {
232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
233 		       "  not inlinable: %C -> %C, %s\n",
234 		       e->caller, e->callee,
235 		       cgraph_inline_failed_string (e->inline_failed));
236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
237 	   || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
238 	  && e->caller->lto_file_data
239 	  && e->callee->ultimate_alias_target ()->lto_file_data)
240 	{
241 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
242 			   "  LTO objects: %s, %s\n",
243 			   e->caller->lto_file_data->file_name,
244 			   e->callee->ultimate_alias_target ()->lto_file_data->file_name);
245 	}
246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
247 	if (dump_file)
248 	  cl_target_option_print_diff
249 	    (dump_file, 2, target_opts_for_fn (e->caller->decl),
250 	     target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
252 	if (dump_file)
253 	  cl_optimization_print_diff
254 	    (dump_file, 2, opts_for_fn (e->caller->decl),
255 	     opts_for_fn (e->callee->ultimate_alias_target ()->decl));
256     }
257 }
258 
259  /* Decide whether sanitizer-related attributes allow inlining. */
260 
261 static bool
sanitize_attrs_match_for_inline_p(const_tree caller,const_tree callee)262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
263 {
264   if (!caller || !callee)
265     return true;
266 
267   /* Allow inlining always_inline functions into no_sanitize_address
268      functions.  */
269   if (!sanitize_flags_p (SANITIZE_ADDRESS, caller)
270       && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
271     return true;
272 
273   return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
274 	   == sanitize_flags_p (SANITIZE_ADDRESS, callee))
275 	  && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
276 	      == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
277 	  && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
278 	      == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
279 }
280 
281 /* Used for flags where it is safe to inline when caller's value is
282    grater than callee's.  */
283 #define check_maybe_up(flag) \
284       (opts_for_fn (caller->decl)->x_##flag		\
285        != opts_for_fn (callee->decl)->x_##flag		\
286        && (!always_inline 				\
287 	   || opts_for_fn (caller->decl)->x_##flag	\
288 	      < opts_for_fn (callee->decl)->x_##flag))
289 /* Used for flags where it is safe to inline when caller's value is
290    smaller than callee's.  */
291 #define check_maybe_down(flag) \
292       (opts_for_fn (caller->decl)->x_##flag		\
293        != opts_for_fn (callee->decl)->x_##flag		\
294        && (!always_inline 				\
295 	   || opts_for_fn (caller->decl)->x_##flag	\
296 	      > opts_for_fn (callee->decl)->x_##flag))
297 /* Used for flags where exact match is needed for correctness.  */
298 #define check_match(flag) \
299       (opts_for_fn (caller->decl)->x_##flag		\
300        != opts_for_fn (callee->decl)->x_##flag)
301 
302 /* Decide if we can inline the edge and possibly update
303    inline_failed reason.
304    We check whether inlining is possible at all and whether
305    caller growth limits allow doing so.
306 
307    if REPORT is true, output reason to the dump file. */
308 
309 static bool
310 can_inline_edge_p (struct cgraph_edge *e, bool report,
311 		   bool early = false)
312 {
313   gcc_checking_assert (e->inline_failed);
314 
315   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
316     {
317       if (report)
318         report_inline_failed_reason (e);
319       return false;
320     }
321 
322   bool inlinable = true;
323   enum availability avail;
324   cgraph_node *caller = e->caller->global.inlined_to
325 		        ? e->caller->global.inlined_to : e->caller;
326   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
327 
328   if (!callee->definition)
329     {
330       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
331       inlinable = false;
332     }
333   if (!early && (!opt_for_fn (callee->decl, optimize)
334 		 || !opt_for_fn (caller->decl, optimize)))
335     {
336       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
337       inlinable = false;
338     }
339   else if (callee->calls_comdat_local)
340     {
341       e->inline_failed = CIF_USES_COMDAT_LOCAL;
342       inlinable = false;
343     }
344   else if (avail <= AVAIL_INTERPOSABLE)
345     {
346       e->inline_failed = CIF_OVERWRITABLE;
347       inlinable = false;
348     }
349   /* All edges with call_stmt_cannot_inline_p should have inline_failed
350      initialized to one of FINAL_ERROR reasons.  */
351   else if (e->call_stmt_cannot_inline_p)
352     gcc_unreachable ();
353   /* Don't inline if the functions have different EH personalities.  */
354   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
355 	   && DECL_FUNCTION_PERSONALITY (callee->decl)
356 	   && (DECL_FUNCTION_PERSONALITY (caller->decl)
357 	       != DECL_FUNCTION_PERSONALITY (callee->decl)))
358     {
359       e->inline_failed = CIF_EH_PERSONALITY;
360       inlinable = false;
361     }
362   /* TM pure functions should not be inlined into non-TM_pure
363      functions.  */
364   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
365     {
366       e->inline_failed = CIF_UNSPECIFIED;
367       inlinable = false;
368     }
369   /* Check compatibility of target optimization options.  */
370   else if (!targetm.target_option.can_inline_p (caller->decl,
371 						callee->decl))
372     {
373       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
374       inlinable = false;
375     }
376   else if (ipa_fn_summaries->get (callee) == NULL
377 	   || !ipa_fn_summaries->get (callee)->inlinable)
378     {
379       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
380       inlinable = false;
381     }
382   /* Don't inline a function with mismatched sanitization attributes. */
383   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
384     {
385       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
386       inlinable = false;
387     }
388   if (!inlinable && report)
389     report_inline_failed_reason (e);
390   return inlinable;
391 }
392 
393 /* Decide if we can inline the edge and possibly update
394    inline_failed reason.
395    We check whether inlining is possible at all and whether
396    caller growth limits allow doing so.
397 
398    if REPORT is true, output reason to the dump file.
399 
400    if DISREGARD_LIMITS is true, ignore size limits.  */
401 
402 static bool
403 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
404 		             bool disregard_limits = false, bool early = false)
405 {
406   gcc_checking_assert (e->inline_failed);
407 
408   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
409     {
410       if (report)
411         report_inline_failed_reason (e);
412       return false;
413     }
414 
415   bool inlinable = true;
416   enum availability avail;
417   cgraph_node *caller = e->caller->global.inlined_to
418 		        ? e->caller->global.inlined_to : e->caller;
419   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
420   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
421   tree callee_tree
422     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
423   /* Check if caller growth allows the inlining.  */
424   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
425       && !disregard_limits
426       && !lookup_attribute ("flatten",
427      		 DECL_ATTRIBUTES (caller->decl))
428       && !caller_growth_limits (e))
429     inlinable = false;
430   else if (callee->externally_visible
431 	   && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
432 	   && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
433     {
434       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
435       inlinable = false;
436     }
437   /* Don't inline a function with a higher optimization level than the
438      caller.  FIXME: this is really just tip of iceberg of handling
439      optimization attribute.  */
440   else if (caller_tree != callee_tree)
441     {
442       bool always_inline =
443 	     (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
444 	      && lookup_attribute ("always_inline",
445 				   DECL_ATTRIBUTES (callee->decl)));
446       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
447       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
448 
449      /* Until GCC 4.9 we did not check the semantics-altering flags
450 	below and inlined across optimization boundaries.
451 	Enabling checks below breaks several packages by refusing
452 	to inline library always_inline functions. See PR65873.
453 	Disable the check for early inlining for now until better solution
454 	is found.  */
455      if (always_inline && early)
456 	;
457       /* There are some options that change IL semantics which means
458          we cannot inline in these cases for correctness reason.
459 	 Not even for always_inline declared functions.  */
460      else if (check_match (flag_wrapv)
461 	      || check_match (flag_trapv)
462 	      || check_match (flag_pcc_struct_return)
463 	      /* When caller or callee does FP math, be sure FP codegen flags
464 		 compatible.  */
465 	      || ((caller_info->fp_expressions && callee_info->fp_expressions)
466 		  && (check_maybe_up (flag_rounding_math)
467 		      || check_maybe_up (flag_trapping_math)
468 		      || check_maybe_down (flag_unsafe_math_optimizations)
469 		      || check_maybe_down (flag_finite_math_only)
470 		      || check_maybe_up (flag_signaling_nans)
471 		      || check_maybe_down (flag_cx_limited_range)
472 		      || check_maybe_up (flag_signed_zeros)
473 		      || check_maybe_down (flag_associative_math)
474 		      || check_maybe_down (flag_reciprocal_math)
475 		      || check_maybe_down (flag_fp_int_builtin_inexact)
476 		      /* Strictly speaking only when the callee contains function
477 			 calls that may end up setting errno.  */
478 		      || check_maybe_up (flag_errno_math)))
479 	      /* We do not want to make code compiled with exceptions to be
480 		 brought into a non-EH function unless we know that the callee
481 		 does not throw.
482 		 This is tracked by DECL_FUNCTION_PERSONALITY.  */
483 	      || (check_maybe_up (flag_non_call_exceptions)
484 		  && DECL_FUNCTION_PERSONALITY (callee->decl))
485 	      || (check_maybe_up (flag_exceptions)
486 		  && DECL_FUNCTION_PERSONALITY (callee->decl))
487 	      /* When devirtualization is diabled for callee, it is not safe
488 		 to inline it as we possibly mangled the type info.
489 		 Allow early inlining of always inlines.  */
490 	      || (!early && check_maybe_down (flag_devirtualize)))
491 	{
492 	  e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
493 	  inlinable = false;
494 	}
495       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
496       else if (always_inline)
497 	;
498       /* When user added an attribute to the callee honor it.  */
499       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
500 	       && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
501 	{
502 	  e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
503 	  inlinable = false;
504 	}
505       /* If explicit optimize attribute are not used, the mismatch is caused
506 	 by different command line options used to build different units.
507 	 Do not care about COMDAT functions - those are intended to be
508          optimized with the optimization flags of module they are used in.
509 	 Also do not care about mixing up size/speed optimization when
510 	 DECL_DISREGARD_INLINE_LIMITS is set.  */
511       else if ((callee->merged_comdat
512 	        && !lookup_attribute ("optimize",
513 				      DECL_ATTRIBUTES (caller->decl)))
514 	       || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
515 	;
516       /* If mismatch is caused by merging two LTO units with different
517 	 optimizationflags we want to be bit nicer.  However never inline
518 	 if one of functions is not optimized at all.  */
519       else if (!opt_for_fn (callee->decl, optimize)
520       	       || !opt_for_fn (caller->decl, optimize))
521 	{
522 	  e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
523 	  inlinable = false;
524 	}
525       /* If callee is optimized for size and caller is not, allow inlining if
526 	 code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
527 	 is inline (and thus likely an unified comdat).  This will allow caller
528 	 to run faster.  */
529       else if (opt_for_fn (callee->decl, optimize_size)
530 	       > opt_for_fn (caller->decl, optimize_size))
531 	{
532 	  int growth = estimate_edge_growth (e);
533 	  if (growth > PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE)
534 	      && (!DECL_DECLARED_INLINE_P (callee->decl)
535 		  && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
536 				    MAX_INLINE_INSNS_AUTO)))
537 	    {
538 	      e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
539 	      inlinable = false;
540 	    }
541 	}
542       /* If callee is more aggressively optimized for performance than caller,
543 	 we generally want to inline only cheap (runtime wise) functions.  */
544       else if (opt_for_fn (callee->decl, optimize_size)
545 	       < opt_for_fn (caller->decl, optimize_size)
546 	       || (opt_for_fn (callee->decl, optimize)
547 		   > opt_for_fn (caller->decl, optimize)))
548 	{
549 	  if (estimate_edge_time (e)
550 	      >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
551 	    {
552 	      e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
553 	      inlinable = false;
554 	    }
555 	}
556 
557     }
558 
559   if (!inlinable && report)
560     report_inline_failed_reason (e);
561   return inlinable;
562 }
563 
564 
565 /* Return true if the edge E is inlinable during early inlining.  */
566 
567 static bool
can_early_inline_edge_p(struct cgraph_edge * e)568 can_early_inline_edge_p (struct cgraph_edge *e)
569 {
570   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
571   /* Early inliner might get called at WPA stage when IPA pass adds new
572      function.  In this case we cannot really do any of early inlining
573      because function bodies are missing.  */
574   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
575     return false;
576   if (!gimple_has_body_p (callee->decl))
577     {
578       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
579       return false;
580     }
581   /* In early inliner some of callees may not be in SSA form yet
582      (i.e. the callgraph is cyclic and we did not process
583      the callee by early inliner, yet).  We don't have CIF code for this
584      case; later we will re-do the decision in the real inliner.  */
585   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
586       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
587     {
588       if (dump_enabled_p ())
589 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
590 			 "  edge not inlinable: not in SSA form\n");
591       return false;
592     }
593   if (!can_inline_edge_p (e, true, true)
594       || !can_inline_edge_by_limits_p (e, true, false, true))
595     return false;
596   return true;
597 }
598 
599 
600 /* Return number of calls in N.  Ignore cheap builtins.  */
601 
602 static int
num_calls(struct cgraph_node * n)603 num_calls (struct cgraph_node *n)
604 {
605   struct cgraph_edge *e;
606   int num = 0;
607 
608   for (e = n->callees; e; e = e->next_callee)
609     if (!is_inexpensive_builtin (e->callee->decl))
610       num++;
611   return num;
612 }
613 
614 
615 /* Return true if we are interested in inlining small function.  */
616 
617 static bool
want_early_inline_function_p(struct cgraph_edge * e)618 want_early_inline_function_p (struct cgraph_edge *e)
619 {
620   bool want_inline = true;
621   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
622 
623   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
624     ;
625   /* For AutoFDO, we need to make sure that before profile summary, all
626      hot paths' IR look exactly the same as profiled binary. As a result,
627      in einliner, we will disregard size limit and inline those callsites
628      that are:
629        * inlined in the profiled binary, and
630        * the cloned callee has enough samples to be considered "hot".  */
631   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
632     ;
633   else if (!DECL_DECLARED_INLINE_P (callee->decl)
634 	   && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
635     {
636       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
637       report_inline_failed_reason (e);
638       want_inline = false;
639     }
640   else
641     {
642       int growth = estimate_edge_growth (e);
643       int n;
644 
645       if (growth <= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE))
646 	;
647       else if (!e->maybe_hot_p ())
648 	{
649 	  if (dump_enabled_p ())
650 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
651 			     "  will not early inline: %C->%C, "
652 			     "call is cold and code would grow by %i\n",
653 			     e->caller, callee,
654 			     growth);
655 	  want_inline = false;
656 	}
657       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
658 	{
659 	  if (dump_enabled_p ())
660 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
661 			     "  will not early inline: %C->%C, "
662 			     "growth %i exceeds --param early-inlining-insns\n",
663 			     e->caller, callee,
664 			     growth);
665 	  want_inline = false;
666 	}
667       else if ((n = num_calls (callee)) != 0
668 	       && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
669 	{
670 	  if (dump_enabled_p ())
671 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
672 			     "  will not early inline: %C->%C, "
673 			     "growth %i exceeds --param early-inlining-insns "
674 			     "divided by number of calls\n",
675 			     e->caller, callee,
676 			     growth);
677 	  want_inline = false;
678 	}
679     }
680   return want_inline;
681 }
682 
683 /* Compute time of the edge->caller + edge->callee execution when inlining
684    does not happen.  */
685 
686 inline sreal
compute_uninlined_call_time(struct cgraph_edge * edge,sreal uninlined_call_time)687 compute_uninlined_call_time (struct cgraph_edge *edge,
688 			     sreal uninlined_call_time)
689 {
690   cgraph_node *caller = (edge->caller->global.inlined_to
691 			 ? edge->caller->global.inlined_to
692 			 : edge->caller);
693 
694   sreal freq = edge->sreal_frequency ();
695   if (freq > 0)
696     uninlined_call_time *= freq;
697   else
698     uninlined_call_time = uninlined_call_time >> 11;
699 
700   sreal caller_time = ipa_fn_summaries->get (caller)->time;
701   return uninlined_call_time + caller_time;
702 }
703 
704 /* Same as compute_uinlined_call_time but compute time when inlining
705    does happen.  */
706 
707 inline sreal
compute_inlined_call_time(struct cgraph_edge * edge,sreal time)708 compute_inlined_call_time (struct cgraph_edge *edge,
709 			   sreal time)
710 {
711   cgraph_node *caller = (edge->caller->global.inlined_to
712 			 ? edge->caller->global.inlined_to
713 			 : edge->caller);
714   sreal caller_time = ipa_fn_summaries->get (caller)->time;
715 
716   sreal freq = edge->sreal_frequency ();
717   if (freq > 0)
718     time *= freq;
719   else
720     time = time >> 11;
721 
722   /* This calculation should match one in ipa-inline-analysis.c
723      (estimate_edge_size_and_time).  */
724   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
725   time += caller_time;
726   if (time <= 0)
727     time = ((sreal) 1) >> 8;
728   gcc_checking_assert (time >= 0);
729   return time;
730 }
731 
732 /* Return true if the speedup for inlining E is bigger than
733    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
734 
735 static bool
big_speedup_p(struct cgraph_edge * e)736 big_speedup_p (struct cgraph_edge *e)
737 {
738   sreal unspec_time;
739   sreal spec_time = estimate_edge_time (e, &unspec_time);
740   sreal time = compute_uninlined_call_time (e, unspec_time);
741   sreal inlined_time = compute_inlined_call_time (e, spec_time);
742 
743   if ((time - inlined_time) * 100
744       > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)))
745     return true;
746   return false;
747 }
748 
749 /* Return true if we are interested in inlining small function.
750    When REPORT is true, report reason to dump file.  */
751 
752 static bool
want_inline_small_function_p(struct cgraph_edge * e,bool report)753 want_inline_small_function_p (struct cgraph_edge *e, bool report)
754 {
755   bool want_inline = true;
756   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
757 
758   /* Allow this function to be called before can_inline_edge_p,
759      since it's usually cheaper.  */
760   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
761     want_inline = false;
762   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
763     ;
764   else if (!DECL_DECLARED_INLINE_P (callee->decl)
765 	   && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
766     {
767       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
768       want_inline = false;
769     }
770   /* Do fast and conservative check if the function can be good
771      inline candidate.  At the moment we allow inline hints to
772      promote non-inline functions to inline and we increase
773      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
774   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
775 	   && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
776 	   && ipa_fn_summaries->get (callee)->min_size
777 		- ipa_call_summaries->get (e)->call_stmt_size
778 	      > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
779     {
780       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
781       want_inline = false;
782     }
783   else if ((DECL_DECLARED_INLINE_P (callee->decl)
784 	    || e->count.ipa ().nonzero_p ())
785 	   && ipa_fn_summaries->get (callee)->min_size
786 		- ipa_call_summaries->get (e)->call_stmt_size
787 	      > 16 * MAX_INLINE_INSNS_SINGLE)
788     {
789       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
790 			  ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
791 			  : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
792       want_inline = false;
793     }
794   else
795     {
796       int growth = estimate_edge_growth (e);
797       ipa_hints hints = estimate_edge_hints (e);
798       int big_speedup = -1; /* compute this lazily */
799 
800       if (growth <= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE))
801 	;
802       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
803 	 hints suggests that inlining given function is very profitable.  */
804       else if (DECL_DECLARED_INLINE_P (callee->decl)
805 	       && growth >= MAX_INLINE_INSNS_SINGLE
806 	       && (growth >= MAX_INLINE_INSNS_SINGLE * 16
807 		   || (!(hints & (INLINE_HINT_indirect_call
808 				  | INLINE_HINT_known_hot
809 				  | INLINE_HINT_loop_iterations
810 				  | INLINE_HINT_array_index
811 				  | INLINE_HINT_loop_stride))
812 		       && !(big_speedup = big_speedup_p (e)))))
813 	{
814           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
815 	  want_inline = false;
816 	}
817       else if (!DECL_DECLARED_INLINE_P (callee->decl)
818 	       && !opt_for_fn (e->caller->decl, flag_inline_functions)
819 	       && growth >= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SMALL))
820 	{
821 	  /* growth_likely_positive is expensive, always test it last.  */
822           if (growth >= MAX_INLINE_INSNS_SINGLE
823 	      || growth_likely_positive (callee, growth))
824 	    {
825               e->inline_failed = CIF_NOT_DECLARED_INLINED;
826 	      want_inline = false;
827  	    }
828 	}
829       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
830 	 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
831 	 inlining given function is very profitable.  */
832       else if (!DECL_DECLARED_INLINE_P (callee->decl)
833 	       && !(hints & INLINE_HINT_known_hot)
834 	       && growth >= ((hints & (INLINE_HINT_indirect_call
835 				       | INLINE_HINT_loop_iterations
836 			               | INLINE_HINT_array_index
837 				       | INLINE_HINT_loop_stride))
838 			     ? MAX (MAX_INLINE_INSNS_AUTO,
839 				    MAX_INLINE_INSNS_SINGLE)
840 			     : MAX_INLINE_INSNS_AUTO)
841 	       && !(big_speedup == -1 ? big_speedup_p (e) : big_speedup))
842 	{
843 	  /* growth_likely_positive is expensive, always test it last.  */
844           if (growth >= MAX_INLINE_INSNS_SINGLE
845 	      || growth_likely_positive (callee, growth))
846 	    {
847 	      e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
848 	      want_inline = false;
849  	    }
850 	}
851       /* If call is cold, do not inline when function body would grow. */
852       else if (!e->maybe_hot_p ()
853 	       && (growth >= MAX_INLINE_INSNS_SINGLE
854 		   || growth_likely_positive (callee, growth)))
855 	{
856           e->inline_failed = CIF_UNLIKELY_CALL;
857 	  want_inline = false;
858 	}
859     }
860   if (!want_inline && report)
861     report_inline_failed_reason (e);
862   return want_inline;
863 }
864 
865 /* EDGE is self recursive edge.
866    We hand two cases - when function A is inlining into itself
867    or when function A is being inlined into another inliner copy of function
868    A within function B.
869 
870    In first case OUTER_NODE points to the toplevel copy of A, while
871    in the second case OUTER_NODE points to the outermost copy of A in B.
872 
873    In both cases we want to be extra selective since
874    inlining the call will just introduce new recursive calls to appear.  */
875 
876 static bool
want_inline_self_recursive_call_p(struct cgraph_edge * edge,struct cgraph_node * outer_node,bool peeling,int depth)877 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
878 				   struct cgraph_node *outer_node,
879 				   bool peeling,
880 				   int depth)
881 {
882   char const *reason = NULL;
883   bool want_inline = true;
884   sreal caller_freq = 1;
885   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
886 
887   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
888     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
889 
890   if (!edge->maybe_hot_p ())
891     {
892       reason = "recursive call is cold";
893       want_inline = false;
894     }
895   else if (depth > max_depth)
896     {
897       reason = "--param max-inline-recursive-depth exceeded.";
898       want_inline = false;
899     }
900   else if (outer_node->global.inlined_to
901 	   && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
902     {
903       reason = "caller frequency is 0";
904       want_inline = false;
905     }
906 
907   if (!want_inline)
908     ;
909   /* Inlining of self recursive function into copy of itself within other
910      function is transformation similar to loop peeling.
911 
912      Peeling is profitable if we can inline enough copies to make probability
913      of actual call to the self recursive function very small.  Be sure that
914      the probability of recursion is small.
915 
916      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
917      This way the expected number of recursion is at most max_depth.  */
918   else if (peeling)
919     {
920       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
921       int i;
922       for (i = 1; i < depth; i++)
923 	max_prob = max_prob * max_prob;
924       if (edge->sreal_frequency () >= max_prob * caller_freq)
925 	{
926 	  reason = "frequency of recursive call is too large";
927 	  want_inline = false;
928 	}
929     }
930   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
931      recursion depth is large.  We reduce function call overhead and increase
932      chances that things fit in hardware return predictor.
933 
934      Recursive inlining might however increase cost of stack frame setup
935      actually slowing down functions whose recursion tree is wide rather than
936      deep.
937 
938      Deciding reliably on when to do recursive inlining without profile feedback
939      is tricky.  For now we disable recursive inlining when probability of self
940      recursion is low.
941 
942      Recursive inlining of self recursive call within loop also results in
943      large loop depths that generally optimize badly.  We may want to throttle
944      down inlining in those cases.  In particular this seems to happen in one
945      of libstdc++ rb tree methods.  */
946   else
947     {
948       if (edge->sreal_frequency () * 100
949           <= caller_freq
950 	     * PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))
951 	{
952 	  reason = "frequency of recursive call is too small";
953 	  want_inline = false;
954 	}
955     }
956   if (!want_inline && dump_enabled_p ())
957     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
958 		     "   not inlining recursively: %s\n", reason);
959   return want_inline;
960 }
961 
962 /* Return true when NODE has uninlinable caller;
963    set HAS_HOT_CALL if it has hot call.
964    Worker for cgraph_for_node_and_aliases.  */
965 
966 static bool
check_callers(struct cgraph_node * node,void * has_hot_call)967 check_callers (struct cgraph_node *node, void *has_hot_call)
968 {
969   struct cgraph_edge *e;
970    for (e = node->callers; e; e = e->next_caller)
971      {
972        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
973 	   || !opt_for_fn (e->caller->decl, optimize))
974 	 return true;
975        if (!can_inline_edge_p (e, true))
976          return true;
977        if (e->recursive_p ())
978 	 return true;
979        if (!can_inline_edge_by_limits_p (e, true))
980          return true;
981        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
982 	 *(bool *)has_hot_call = true;
983      }
984   return false;
985 }
986 
987 /* If NODE has a caller, return true.  */
988 
989 static bool
has_caller_p(struct cgraph_node * node,void * data ATTRIBUTE_UNUSED)990 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
991 {
992   if (node->callers)
993     return true;
994   return false;
995 }
996 
997 /* Decide if inlining NODE would reduce unit size by eliminating
998    the offline copy of function.
999    When COLD is true the cold calls are considered, too.  */
1000 
1001 static bool
want_inline_function_to_all_callers_p(struct cgraph_node * node,bool cold)1002 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1003 {
1004   bool has_hot_call = false;
1005 
1006   /* Aliases gets inlined along with the function they alias.  */
1007   if (node->alias)
1008     return false;
1009   /* Already inlined?  */
1010   if (node->global.inlined_to)
1011     return false;
1012   /* Does it have callers?  */
1013   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1014     return false;
1015   /* Inlining into all callers would increase size?  */
1016   if (estimate_growth (node) > 0)
1017     return false;
1018   /* All inlines must be possible.  */
1019   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1020 					 true))
1021     return false;
1022   if (!cold && !has_hot_call)
1023     return false;
1024   return true;
1025 }
1026 
1027 /* A cost model driving the inlining heuristics in a way so the edges with
1028    smallest badness are inlined first.  After each inlining is performed
1029    the costs of all caller edges of nodes affected are recomputed so the
1030    metrics may accurately depend on values such as number of inlinable callers
1031    of the function or function body size.  */
1032 
1033 static sreal
edge_badness(struct cgraph_edge * edge,bool dump)1034 edge_badness (struct cgraph_edge *edge, bool dump)
1035 {
1036   sreal badness;
1037   int growth;
1038   sreal edge_time, unspec_edge_time;
1039   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1040   struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1041   ipa_hints hints;
1042   cgraph_node *caller = (edge->caller->global.inlined_to
1043 			 ? edge->caller->global.inlined_to
1044 			 : edge->caller);
1045 
1046   growth = estimate_edge_growth (edge);
1047   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1048   hints = estimate_edge_hints (edge);
1049   gcc_checking_assert (edge_time >= 0);
1050   /* Check that inlined time is better, but tolerate some roundoff issues.
1051      FIXME: When callee profile drops to 0 we account calls more.  This
1052      should be fixed by never doing that.  */
1053   gcc_checking_assert ((edge_time * 100
1054 			- callee_info->time * 101).to_int () <= 0
1055 			|| callee->count.ipa ().initialized_p ());
1056   gcc_checking_assert (growth <= callee_info->size);
1057 
1058   if (dump)
1059     {
1060       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1061 	       edge->caller->dump_name (),
1062 	       edge->callee->dump_name ());
1063       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1064 	       growth,
1065 	       edge_time.to_double (),
1066 	       unspec_edge_time.to_double ());
1067       ipa_dump_hints (dump_file, hints);
1068       if (big_speedup_p (edge))
1069 	fprintf (dump_file, " big_speedup");
1070       fprintf (dump_file, "\n");
1071     }
1072 
1073   /* Always prefer inlining saving code size.  */
1074   if (growth <= 0)
1075     {
1076       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1077       if (dump)
1078 	fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1079 		 growth);
1080     }
1081    /* Inlining into EXTERNAL functions is not going to change anything unless
1082       they are themselves inlined.  */
1083    else if (DECL_EXTERNAL (caller->decl))
1084     {
1085       if (dump)
1086 	fprintf (dump_file, "      max: function is external\n");
1087       return sreal::max ();
1088     }
1089   /* When profile is available. Compute badness as:
1090 
1091                  time_saved * caller_count
1092      goodness =  -------------------------------------------------
1093 	         growth_of_caller * overall_growth * combined_size
1094 
1095      badness = - goodness
1096 
1097      Again use negative value to make calls with profile appear hotter
1098      then calls without.
1099   */
1100   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1101 	   || caller->count.ipa ().nonzero_p ())
1102     {
1103       sreal numerator, denominator;
1104       int overall_growth;
1105       sreal inlined_time = compute_inlined_call_time (edge, edge_time);
1106 
1107       numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
1108 		   - inlined_time);
1109       if (numerator <= 0)
1110 	numerator = ((sreal) 1 >> 8);
1111       if (caller->count.ipa ().nonzero_p ())
1112 	numerator *= caller->count.ipa ().to_gcov_type ();
1113       else if (caller->count.ipa ().initialized_p ())
1114 	numerator = numerator >> 11;
1115       denominator = growth;
1116 
1117       overall_growth = callee_info->growth;
1118 
1119       /* Look for inliner wrappers of the form:
1120 
1121 	 inline_caller ()
1122 	   {
1123 	     do_fast_job...
1124 	     if (need_more_work)
1125 	       noninline_callee ();
1126 	   }
1127 	 Withhout panilizing this case, we usually inline noninline_callee
1128 	 into the inline_caller because overall_growth is small preventing
1129 	 further inlining of inline_caller.
1130 
1131 	 Penalize only callgraph edges to functions with small overall
1132 	 growth ...
1133 	*/
1134       if (growth > overall_growth
1135 	  /* ... and having only one caller which is not inlined ... */
1136 	  && callee_info->single_caller
1137 	  && !edge->caller->global.inlined_to
1138 	  /* ... and edges executed only conditionally ... */
1139 	  && edge->sreal_frequency () < 1
1140 	  /* ... consider case where callee is not inline but caller is ... */
1141 	  && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1142 	       && DECL_DECLARED_INLINE_P (caller->decl))
1143 	      /* ... or when early optimizers decided to split and edge
1144 		 frequency still indicates splitting is a win ... */
1145 	      || (callee->split_part && !caller->split_part
1146 		  && edge->sreal_frequency () * 100
1147 		     < PARAM_VALUE
1148 			  (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY)
1149 		  /* ... and do not overwrite user specified hints.   */
1150 		  && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1151 		      || DECL_DECLARED_INLINE_P (caller->decl)))))
1152 	{
1153 	  ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1154 	  int caller_growth = caller_info->growth;
1155 
1156 	  /* Only apply the penalty when caller looks like inline candidate,
1157 	     and it is not called once and.  */
1158 	  if (!caller_info->single_caller && overall_growth < caller_growth
1159 	      && caller_info->inlinable
1160 	      && caller_info->size
1161 		 < (DECL_DECLARED_INLINE_P (caller->decl)
1162 		    ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
1163 	    {
1164 	      if (dump)
1165 		fprintf (dump_file,
1166 			 "     Wrapper penalty. Increasing growth %i to %i\n",
1167 			 overall_growth, caller_growth);
1168 	      overall_growth = caller_growth;
1169 	    }
1170 	}
1171       if (overall_growth > 0)
1172         {
1173 	  /* Strongly preffer functions with few callers that can be inlined
1174 	     fully.  The square root here leads to smaller binaries at average.
1175 	     Watch however for extreme cases and return to linear function
1176 	     when growth is large.  */
1177 	  if (overall_growth < 256)
1178 	    overall_growth *= overall_growth;
1179 	  else
1180 	    overall_growth += 256 * 256 - 256;
1181 	  denominator *= overall_growth;
1182         }
1183       denominator *= ipa_fn_summaries->get (caller)->self_size + growth;
1184 
1185       badness = - numerator / denominator;
1186 
1187       if (dump)
1188 	{
1189 	  fprintf (dump_file,
1190 		   "      %f: guessed profile. frequency %f, count %" PRId64
1191 		   " caller count %" PRId64
1192 		   " time w/o inlining %f, time with inlining %f"
1193 		   " overall growth %i (current) %i (original)"
1194 		   " %i (compensated)\n",
1195 		   badness.to_double (),
1196 		   edge->sreal_frequency ().to_double (),
1197 		   edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1198 		   caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1199 		   compute_uninlined_call_time (edge,
1200 						unspec_edge_time).to_double (),
1201 		   inlined_time.to_double (),
1202 		   estimate_growth (callee),
1203 		   callee_info->growth, overall_growth);
1204 	}
1205     }
1206   /* When function local profile is not available or it does not give
1207      useful information (ie frequency is zero), base the cost on
1208      loop nest and overall size growth, so we optimize for overall number
1209      of functions fully inlined in program.  */
1210   else
1211     {
1212       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1213       badness = growth;
1214 
1215       /* Decrease badness if call is nested.  */
1216       if (badness > 0)
1217 	badness = badness >> nest;
1218       else
1219 	badness = badness << nest;
1220       if (dump)
1221 	fprintf (dump_file, "      %f: no profile. nest %i\n",
1222 		 badness.to_double (), nest);
1223     }
1224   gcc_checking_assert (badness != 0);
1225 
1226   if (edge->recursive_p ())
1227     badness = badness.shift (badness > 0 ? 4 : -4);
1228   if ((hints & (INLINE_HINT_indirect_call
1229 		| INLINE_HINT_loop_iterations
1230 		| INLINE_HINT_array_index
1231 		| INLINE_HINT_loop_stride))
1232       || callee_info->growth <= 0)
1233     badness = badness.shift (badness > 0 ? -2 : 2);
1234   if (hints & (INLINE_HINT_same_scc))
1235     badness = badness.shift (badness > 0 ? 3 : -3);
1236   else if (hints & (INLINE_HINT_in_scc))
1237     badness = badness.shift (badness > 0 ? 2 : -2);
1238   else if (hints & (INLINE_HINT_cross_module))
1239     badness = badness.shift (badness > 0 ? 1 : -1);
1240   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1241     badness = badness.shift (badness > 0 ? -4 : 4);
1242   else if ((hints & INLINE_HINT_declared_inline))
1243     badness = badness.shift (badness > 0 ? -3 : 3);
1244   if (dump)
1245     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1246   return badness;
1247 }
1248 
1249 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1250 static inline void
update_edge_key(edge_heap_t * heap,struct cgraph_edge * edge)1251 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1252 {
1253   sreal badness = edge_badness (edge, false);
1254   if (edge->aux)
1255     {
1256       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1257       gcc_checking_assert (n->get_data () == edge);
1258 
1259       /* fibonacci_heap::replace_key does busy updating of the
1260 	 heap that is unnecesarily expensive.
1261 	 We do lazy increases: after extracting minimum if the key
1262 	 turns out to be out of date, it is re-inserted into heap
1263 	 with correct value.  */
1264       if (badness < n->get_key ())
1265 	{
1266 	  if (dump_file && (dump_flags & TDF_DETAILS))
1267 	    {
1268 	      fprintf (dump_file,
1269 		       "  decreasing badness %s -> %s, %f to %f\n",
1270 		       edge->caller->dump_name (),
1271 		       edge->callee->dump_name (),
1272 		       n->get_key ().to_double (),
1273 		       badness.to_double ());
1274 	    }
1275 	  heap->decrease_key (n, badness);
1276 	}
1277     }
1278   else
1279     {
1280        if (dump_file && (dump_flags & TDF_DETAILS))
1281 	 {
1282 	   fprintf (dump_file,
1283 		    "  enqueuing call %s -> %s, badness %f\n",
1284 		    edge->caller->dump_name (),
1285 		    edge->callee->dump_name (),
1286 		    badness.to_double ());
1287 	 }
1288       edge->aux = heap->insert (badness, edge);
1289     }
1290 }
1291 
1292 
1293 /* NODE was inlined.
1294    All caller edges needs to be resetted because
1295    size estimates change. Similarly callees needs reset
1296    because better context may be known.  */
1297 
1298 static void
reset_edge_caches(struct cgraph_node * node)1299 reset_edge_caches (struct cgraph_node *node)
1300 {
1301   struct cgraph_edge *edge;
1302   struct cgraph_edge *e = node->callees;
1303   struct cgraph_node *where = node;
1304   struct ipa_ref *ref;
1305 
1306   if (where->global.inlined_to)
1307     where = where->global.inlined_to;
1308 
1309   if (edge_growth_cache != NULL)
1310     for (edge = where->callers; edge; edge = edge->next_caller)
1311       if (edge->inline_failed)
1312 	edge_growth_cache->remove (edge);
1313 
1314   FOR_EACH_ALIAS (where, ref)
1315     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1316 
1317   if (!e)
1318     return;
1319 
1320   while (true)
1321     if (!e->inline_failed && e->callee->callees)
1322       e = e->callee->callees;
1323     else
1324       {
1325 	if (edge_growth_cache != NULL && e->inline_failed)
1326 	  edge_growth_cache->remove (e);
1327 	if (e->next_callee)
1328 	  e = e->next_callee;
1329 	else
1330 	  {
1331 	    do
1332 	      {
1333 		if (e->caller == node)
1334 		  return;
1335 		e = e->caller->callers;
1336 	      }
1337 	    while (!e->next_callee);
1338 	    e = e->next_callee;
1339 	  }
1340       }
1341 }
1342 
1343 /* Recompute HEAP nodes for each of caller of NODE.
1344    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1345    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1346    it is inlinable. Otherwise check all edges.  */
1347 
1348 static void
update_caller_keys(edge_heap_t * heap,struct cgraph_node * node,bitmap updated_nodes,struct cgraph_edge * check_inlinablity_for)1349 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1350 		    bitmap updated_nodes,
1351 		    struct cgraph_edge *check_inlinablity_for)
1352 {
1353   struct cgraph_edge *edge;
1354   struct ipa_ref *ref;
1355 
1356   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1357       || node->global.inlined_to)
1358     return;
1359   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1360     return;
1361 
1362   FOR_EACH_ALIAS (node, ref)
1363     {
1364       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1365       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1366     }
1367 
1368   for (edge = node->callers; edge; edge = edge->next_caller)
1369     if (edge->inline_failed)
1370       {
1371         if (!check_inlinablity_for
1372 	    || check_inlinablity_for == edge)
1373 	  {
1374 	    if (can_inline_edge_p (edge, false)
1375 		&& want_inline_small_function_p (edge, false)
1376 		&& can_inline_edge_by_limits_p (edge, false))
1377 	      update_edge_key (heap, edge);
1378 	    else if (edge->aux)
1379 	      {
1380 		report_inline_failed_reason (edge);
1381 		heap->delete_node ((edge_heap_node_t *) edge->aux);
1382 		edge->aux = NULL;
1383 	      }
1384 	  }
1385 	else if (edge->aux)
1386 	  update_edge_key (heap, edge);
1387       }
1388 }
1389 
1390 /* Recompute HEAP nodes for each uninlined call in NODE.
1391    This is used when we know that edge badnesses are going only to increase
1392    (we introduced new call site) and thus all we need is to insert newly
1393    created edges into heap.  */
1394 
1395 static void
update_callee_keys(edge_heap_t * heap,struct cgraph_node * node,bitmap updated_nodes)1396 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1397 		    bitmap updated_nodes)
1398 {
1399   struct cgraph_edge *e = node->callees;
1400 
1401   if (!e)
1402     return;
1403   while (true)
1404     if (!e->inline_failed && e->callee->callees)
1405       e = e->callee->callees;
1406     else
1407       {
1408 	enum availability avail;
1409 	struct cgraph_node *callee;
1410 	/* We do not reset callee growth cache here.  Since we added a new call,
1411 	   growth chould have just increased and consequentely badness metric
1412            don't need updating.  */
1413 	if (e->inline_failed
1414 	    && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1415 	    && ipa_fn_summaries->get (callee) != NULL
1416 	    && ipa_fn_summaries->get (callee)->inlinable
1417 	    && avail >= AVAIL_AVAILABLE
1418 	    && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1419 	  {
1420 	    if (can_inline_edge_p (e, false)
1421 		&& want_inline_small_function_p (e, false)
1422 		&& can_inline_edge_by_limits_p (e, false))
1423 	      update_edge_key (heap, e);
1424 	    else if (e->aux)
1425 	      {
1426 		report_inline_failed_reason (e);
1427 		heap->delete_node ((edge_heap_node_t *) e->aux);
1428 		e->aux = NULL;
1429 	      }
1430 	  }
1431 	if (e->next_callee)
1432 	  e = e->next_callee;
1433 	else
1434 	  {
1435 	    do
1436 	      {
1437 		if (e->caller == node)
1438 		  return;
1439 		e = e->caller->callers;
1440 	      }
1441 	    while (!e->next_callee);
1442 	    e = e->next_callee;
1443 	  }
1444       }
1445 }
1446 
1447 /* Enqueue all recursive calls from NODE into priority queue depending on
1448    how likely we want to recursively inline the call.  */
1449 
1450 static void
lookup_recursive_calls(struct cgraph_node * node,struct cgraph_node * where,edge_heap_t * heap)1451 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1452 			edge_heap_t *heap)
1453 {
1454   struct cgraph_edge *e;
1455   enum availability avail;
1456 
1457   for (e = where->callees; e; e = e->next_callee)
1458     if (e->callee == node
1459 	|| (e->callee->ultimate_alias_target (&avail, e->caller) == node
1460 	    && avail > AVAIL_INTERPOSABLE))
1461       heap->insert (-e->sreal_frequency (), e);
1462   for (e = where->callees; e; e = e->next_callee)
1463     if (!e->inline_failed)
1464       lookup_recursive_calls (node, e->callee, heap);
1465 }
1466 
1467 /* Decide on recursive inlining: in the case function has recursive calls,
1468    inline until body size reaches given argument.  If any new indirect edges
1469    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1470    is NULL.  */
1471 
1472 static bool
recursive_inlining(struct cgraph_edge * edge,vec<cgraph_edge * > * new_edges)1473 recursive_inlining (struct cgraph_edge *edge,
1474 		    vec<cgraph_edge *> *new_edges)
1475 {
1476   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1477   edge_heap_t heap (sreal::min ());
1478   struct cgraph_node *node;
1479   struct cgraph_edge *e;
1480   struct cgraph_node *master_clone = NULL, *next;
1481   int depth = 0;
1482   int n = 0;
1483 
1484   node = edge->caller;
1485   if (node->global.inlined_to)
1486     node = node->global.inlined_to;
1487 
1488   if (DECL_DECLARED_INLINE_P (node->decl))
1489     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1490 
1491   /* Make sure that function is small enough to be considered for inlining.  */
1492   if (estimate_size_after_inlining (node, edge)  >= limit)
1493     return false;
1494   lookup_recursive_calls (node, node, &heap);
1495   if (heap.empty ())
1496     return false;
1497 
1498   if (dump_file)
1499     fprintf (dump_file,
1500 	     "  Performing recursive inlining on %s\n",
1501 	     node->name ());
1502 
1503   /* Do the inlining and update list of recursive call during process.  */
1504   while (!heap.empty ())
1505     {
1506       struct cgraph_edge *curr = heap.extract_min ();
1507       struct cgraph_node *cnode, *dest = curr->callee;
1508 
1509       if (!can_inline_edge_p (curr, true)
1510 	  || can_inline_edge_by_limits_p (curr, true))
1511 	continue;
1512 
1513       /* MASTER_CLONE is produced in the case we already started modified
1514 	 the function. Be sure to redirect edge to the original body before
1515 	 estimating growths otherwise we will be seeing growths after inlining
1516 	 the already modified body.  */
1517       if (master_clone)
1518 	{
1519 	  curr->redirect_callee (master_clone);
1520 	  if (edge_growth_cache != NULL)
1521 	    edge_growth_cache->remove (curr);
1522 	}
1523 
1524       if (estimate_size_after_inlining (node, curr) > limit)
1525 	{
1526 	  curr->redirect_callee (dest);
1527 	  if (edge_growth_cache != NULL)
1528 	    edge_growth_cache->remove (curr);
1529 	  break;
1530 	}
1531 
1532       depth = 1;
1533       for (cnode = curr->caller;
1534 	   cnode->global.inlined_to; cnode = cnode->callers->caller)
1535 	if (node->decl
1536 	    == curr->callee->ultimate_alias_target ()->decl)
1537           depth++;
1538 
1539       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1540 	{
1541 	  curr->redirect_callee (dest);
1542 	  if (edge_growth_cache != NULL)
1543 	    edge_growth_cache->remove (curr);
1544 	  continue;
1545 	}
1546 
1547       if (dump_file)
1548 	{
1549 	  fprintf (dump_file,
1550 		   "   Inlining call of depth %i", depth);
1551 	  if (node->count.nonzero_p ())
1552 	    {
1553 	      fprintf (dump_file, " called approx. %.2f times per call",
1554 		       (double)curr->count.to_gcov_type ()
1555 		       / node->count.to_gcov_type ());
1556 	    }
1557 	  fprintf (dump_file, "\n");
1558 	}
1559       if (!master_clone)
1560 	{
1561 	  /* We need original clone to copy around.  */
1562 	  master_clone = node->create_clone (node->decl, node->count,
1563 	    false, vNULL, true, NULL, NULL);
1564 	  for (e = master_clone->callees; e; e = e->next_callee)
1565 	    if (!e->inline_failed)
1566 	      clone_inlined_nodes (e, true, false, NULL);
1567 	  curr->redirect_callee (master_clone);
1568 	  if (edge_growth_cache != NULL)
1569 	    edge_growth_cache->remove (curr);
1570 	}
1571 
1572       inline_call (curr, false, new_edges, &overall_size, true);
1573       lookup_recursive_calls (node, curr->callee, &heap);
1574       n++;
1575     }
1576 
1577   if (!heap.empty () && dump_file)
1578     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1579 
1580   if (!master_clone)
1581     return false;
1582 
1583   if (dump_enabled_p ())
1584     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1585 		     "\n   Inlined %i times, "
1586 		     "body grown from size %i to %i, time %f to %f\n", n,
1587 		     ipa_fn_summaries->get (master_clone)->size,
1588 		     ipa_fn_summaries->get (node)->size,
1589 		     ipa_fn_summaries->get (master_clone)->time.to_double (),
1590 		     ipa_fn_summaries->get (node)->time.to_double ());
1591 
1592   /* Remove master clone we used for inlining.  We rely that clones inlined
1593      into master clone gets queued just before master clone so we don't
1594      need recursion.  */
1595   for (node = symtab->first_function (); node != master_clone;
1596        node = next)
1597     {
1598       next = symtab->next_function (node);
1599       if (node->global.inlined_to == master_clone)
1600 	node->remove ();
1601     }
1602   master_clone->remove ();
1603   return true;
1604 }
1605 
1606 
1607 /* Given whole compilation unit estimate of INSNS, compute how large we can
1608    allow the unit to grow.  */
1609 
1610 static int
compute_max_insns(int insns)1611 compute_max_insns (int insns)
1612 {
1613   int max_insns = insns;
1614   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1615     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1616 
1617   return ((int64_t) max_insns
1618 	  * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1619 }
1620 
1621 
1622 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1623 
1624 static void
add_new_edges_to_heap(edge_heap_t * heap,vec<cgraph_edge * > new_edges)1625 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1626 {
1627   while (new_edges.length () > 0)
1628     {
1629       struct cgraph_edge *edge = new_edges.pop ();
1630 
1631       gcc_assert (!edge->aux);
1632       if (edge->inline_failed
1633 	  && can_inline_edge_p (edge, true)
1634 	  && want_inline_small_function_p (edge, true)
1635 	  && can_inline_edge_by_limits_p (edge, true))
1636         edge->aux = heap->insert (edge_badness (edge, false), edge);
1637     }
1638 }
1639 
1640 /* Remove EDGE from the fibheap.  */
1641 
1642 static void
heap_edge_removal_hook(struct cgraph_edge * e,void * data)1643 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1644 {
1645   if (e->aux)
1646     {
1647       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1648       e->aux = NULL;
1649     }
1650 }
1651 
1652 /* Return true if speculation of edge E seems useful.
1653    If ANTICIPATE_INLINING is true, be conservative and hope that E
1654    may get inlined.  */
1655 
1656 bool
speculation_useful_p(struct cgraph_edge * e,bool anticipate_inlining)1657 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1658 {
1659   enum availability avail;
1660   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1661 								 e->caller);
1662   struct cgraph_edge *direct, *indirect;
1663   struct ipa_ref *ref;
1664 
1665   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1666 
1667   if (!e->maybe_hot_p ())
1668     return false;
1669 
1670   /* See if IP optimizations found something potentially useful about the
1671      function.  For now we look only for CONST/PURE flags.  Almost everything
1672      else we propagate is useless.  */
1673   if (avail >= AVAIL_AVAILABLE)
1674     {
1675       int ecf_flags = flags_from_decl_or_type (target->decl);
1676       if (ecf_flags & ECF_CONST)
1677         {
1678 	  e->speculative_call_info (direct, indirect, ref);
1679 	  if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1680 	    return true;
1681         }
1682       else if (ecf_flags & ECF_PURE)
1683         {
1684 	  e->speculative_call_info (direct, indirect, ref);
1685 	  if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1686 	    return true;
1687         }
1688     }
1689   /* If we did not managed to inline the function nor redirect
1690      to an ipa-cp clone (that are seen by having local flag set),
1691      it is probably pointless to inline it unless hardware is missing
1692      indirect call predictor.  */
1693   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1694     return false;
1695   /* For overwritable targets there is not much to do.  */
1696   if (e->inline_failed
1697       && (!can_inline_edge_p (e, false)
1698 	  || !can_inline_edge_by_limits_p (e, false, true)))
1699     return false;
1700   /* OK, speculation seems interesting.  */
1701   return true;
1702 }
1703 
1704 /* We know that EDGE is not going to be inlined.
1705    See if we can remove speculation.  */
1706 
1707 static void
resolve_noninline_speculation(edge_heap_t * edge_heap,struct cgraph_edge * edge)1708 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1709 {
1710   if (edge->speculative && !speculation_useful_p (edge, false))
1711     {
1712       struct cgraph_node *node = edge->caller;
1713       struct cgraph_node *where = node->global.inlined_to
1714 				  ? node->global.inlined_to : node;
1715       auto_bitmap updated_nodes;
1716 
1717       if (edge->count.ipa ().initialized_p ())
1718         spec_rem += edge->count.ipa ();
1719       edge->resolve_speculation ();
1720       reset_edge_caches (where);
1721       ipa_update_overall_fn_summary (where);
1722       update_caller_keys (edge_heap, where,
1723 			  updated_nodes, NULL);
1724       update_callee_keys (edge_heap, where,
1725 			  updated_nodes);
1726     }
1727 }
1728 
1729 /* Return true if NODE should be accounted for overall size estimate.
1730    Skip all nodes optimized for size so we can measure the growth of hot
1731    part of program no matter of the padding.  */
1732 
1733 bool
inline_account_function_p(struct cgraph_node * node)1734 inline_account_function_p (struct cgraph_node *node)
1735 {
1736    return (!DECL_EXTERNAL (node->decl)
1737 	   && !opt_for_fn (node->decl, optimize_size)
1738 	   && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1739 }
1740 
1741 /* Count number of callers of NODE and store it into DATA (that
1742    points to int.  Worker for cgraph_for_node_and_aliases.  */
1743 
1744 static bool
sum_callers(struct cgraph_node * node,void * data)1745 sum_callers (struct cgraph_node *node, void *data)
1746 {
1747   struct cgraph_edge *e;
1748   int *num_calls = (int *)data;
1749 
1750   for (e = node->callers; e; e = e->next_caller)
1751     (*num_calls)++;
1752   return false;
1753 }
1754 
1755 /* We use greedy algorithm for inlining of small functions:
1756    All inline candidates are put into prioritized heap ordered in
1757    increasing badness.
1758 
1759    The inlining of small functions is bounded by unit growth parameters.  */
1760 
1761 static void
inline_small_functions(void)1762 inline_small_functions (void)
1763 {
1764   struct cgraph_node *node;
1765   struct cgraph_edge *edge;
1766   edge_heap_t edge_heap (sreal::min ());
1767   auto_bitmap updated_nodes;
1768   int min_size, max_size;
1769   auto_vec<cgraph_edge *> new_indirect_edges;
1770   int initial_size = 0;
1771   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1772   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1773   new_indirect_edges.create (8);
1774 
1775   edge_removal_hook_holder
1776     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1777 
1778   /* Compute overall unit size and other global parameters used by badness
1779      metrics.  */
1780 
1781   max_count = profile_count::uninitialized ();
1782   ipa_reduced_postorder (order, true, NULL);
1783   free (order);
1784 
1785   FOR_EACH_DEFINED_FUNCTION (node)
1786     if (!node->global.inlined_to)
1787       {
1788 	if (!node->alias && node->analyzed
1789 	    && (node->has_gimple_body_p () || node->thunk.thunk_p)
1790 	    && opt_for_fn (node->decl, optimize))
1791 	  {
1792 	    struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
1793 	    struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1794 
1795 	    /* Do not account external functions, they will be optimized out
1796 	       if not inlined.  Also only count the non-cold portion of program.  */
1797 	    if (inline_account_function_p (node))
1798 	      initial_size += info->size;
1799 	    info->growth = estimate_growth (node);
1800 
1801 	    int num_calls = 0;
1802 	    node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1803 					       true);
1804 	    if (num_calls == 1)
1805 	      info->single_caller = true;
1806 	    if (dfs && dfs->next_cycle)
1807 	      {
1808 		struct cgraph_node *n2;
1809 		int id = dfs->scc_no + 1;
1810 		for (n2 = node; n2;
1811 		     n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1812 		  if (opt_for_fn (n2->decl, optimize))
1813 		    {
1814 		      ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
1815 		      if (info2->scc_no)
1816 			break;
1817 		      info2->scc_no = id;
1818 		    }
1819 	      }
1820 	  }
1821 
1822 	for (edge = node->callers; edge; edge = edge->next_caller)
1823 	  max_count = max_count.max (edge->count.ipa ());
1824       }
1825   ipa_free_postorder_info ();
1826   edge_growth_cache
1827     = new call_summary<edge_growth_cache_entry *> (symtab, false);
1828 
1829   if (dump_file)
1830     fprintf (dump_file,
1831 	     "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1832 	     initial_size);
1833 
1834   overall_size = initial_size;
1835   max_size = compute_max_insns (overall_size);
1836   min_size = overall_size;
1837 
1838   /* Populate the heap with all edges we might inline.  */
1839 
1840   FOR_EACH_DEFINED_FUNCTION (node)
1841     {
1842       bool update = false;
1843       struct cgraph_edge *next = NULL;
1844       bool has_speculative = false;
1845 
1846       if (!opt_for_fn (node->decl, optimize))
1847 	continue;
1848 
1849       if (dump_file)
1850 	fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1851 
1852       for (edge = node->callees; edge; edge = next)
1853 	{
1854 	  next = edge->next_callee;
1855 	  if (edge->inline_failed
1856 	      && !edge->aux
1857 	      && can_inline_edge_p (edge, true)
1858 	      && want_inline_small_function_p (edge, true)
1859 	      && can_inline_edge_by_limits_p (edge, true)
1860 	      && edge->inline_failed)
1861 	    {
1862 	      gcc_assert (!edge->aux);
1863 	      update_edge_key (&edge_heap, edge);
1864 	    }
1865 	  if (edge->speculative)
1866 	    has_speculative = true;
1867 	}
1868       if (has_speculative)
1869 	for (edge = node->callees; edge; edge = next)
1870 	  if (edge->speculative && !speculation_useful_p (edge,
1871 							  edge->aux != NULL))
1872 	    {
1873 	      edge->resolve_speculation ();
1874 	      update = true;
1875 	    }
1876       if (update)
1877 	{
1878 	  struct cgraph_node *where = node->global.inlined_to
1879 				      ? node->global.inlined_to : node;
1880 	  ipa_update_overall_fn_summary (where);
1881 	  reset_edge_caches (where);
1882           update_caller_keys (&edge_heap, where,
1883 			      updated_nodes, NULL);
1884           update_callee_keys (&edge_heap, where,
1885 			      updated_nodes);
1886           bitmap_clear (updated_nodes);
1887 	}
1888     }
1889 
1890   gcc_assert (in_lto_p
1891 	      || !(max_count > 0)
1892 	      || (profile_info && flag_branch_probabilities));
1893 
1894   while (!edge_heap.empty ())
1895     {
1896       int old_size = overall_size;
1897       struct cgraph_node *where, *callee;
1898       sreal badness = edge_heap.min_key ();
1899       sreal current_badness;
1900       int growth;
1901 
1902       edge = edge_heap.extract_min ();
1903       gcc_assert (edge->aux);
1904       edge->aux = NULL;
1905       if (!edge->inline_failed || !edge->callee->analyzed)
1906 	continue;
1907 
1908 #if CHECKING_P
1909       /* Be sure that caches are maintained consistent.
1910 	 This check is affected by scaling roundoff errors when compiling for
1911 	 IPA this we skip it in that case.  */
1912       if (!edge->callee->count.ipa_p ()
1913 	  && (!max_count.initialized_p () || !max_count.nonzero_p ()))
1914 	{
1915 	  sreal cached_badness = edge_badness (edge, false);
1916 
1917 	  int old_size_est = estimate_edge_size (edge);
1918 	  sreal old_time_est = estimate_edge_time (edge);
1919 	  int old_hints_est = estimate_edge_hints (edge);
1920 
1921 	  if (edge_growth_cache != NULL)
1922 	    edge_growth_cache->remove (edge);
1923 	  gcc_assert (old_size_est == estimate_edge_size (edge));
1924 	  gcc_assert (old_time_est == estimate_edge_time (edge));
1925 	  /* FIXME:
1926 
1927 	     gcc_assert (old_hints_est == estimate_edge_hints (edge));
1928 
1929 	     fails with profile feedback because some hints depends on
1930 	     maybe_hot_edge_p predicate and because callee gets inlined to other
1931 	     calls, the edge may become cold.
1932 	     This ought to be fixed by computing relative probabilities
1933 	     for given invocation but that will be better done once whole
1934 	     code is converted to sreals.  Disable for now and revert to "wrong"
1935 	     value so enable/disable checking paths agree.  */
1936 	  edge_growth_cache->get (edge)->hints = old_hints_est + 1;
1937 
1938 	  /* When updating the edge costs, we only decrease badness in the keys.
1939 	     Increases of badness are handled lazilly; when we see key with out
1940 	     of date value on it, we re-insert it now.  */
1941 	  current_badness = edge_badness (edge, false);
1942 	  gcc_assert (cached_badness == current_badness);
1943 	  gcc_assert (current_badness >= badness);
1944 	}
1945       else
1946         current_badness = edge_badness (edge, false);
1947 #else
1948       current_badness = edge_badness (edge, false);
1949 #endif
1950       if (current_badness != badness)
1951 	{
1952 	  if (edge_heap.min () && current_badness > edge_heap.min_key ())
1953 	    {
1954 	      edge->aux = edge_heap.insert (current_badness, edge);
1955 	      continue;
1956 	    }
1957 	  else
1958 	    badness = current_badness;
1959 	}
1960 
1961       if (!can_inline_edge_p (edge, true)
1962 	  || !can_inline_edge_by_limits_p (edge, true))
1963 	{
1964 	  resolve_noninline_speculation (&edge_heap, edge);
1965 	  continue;
1966 	}
1967 
1968       callee = edge->callee->ultimate_alias_target ();
1969       growth = estimate_edge_growth (edge);
1970       if (dump_file)
1971 	{
1972 	  fprintf (dump_file,
1973 		   "\nConsidering %s with %i size\n",
1974 		   callee->dump_name (),
1975 		   ipa_fn_summaries->get (callee)->size);
1976 	  fprintf (dump_file,
1977 		   " to be inlined into %s in %s:%i\n"
1978 		   " Estimated badness is %f, frequency %.2f.\n",
1979 		   edge->caller->dump_name (),
1980 		   edge->call_stmt
1981 		   && (LOCATION_LOCUS (gimple_location ((const gimple *)
1982 							edge->call_stmt))
1983 		       > BUILTINS_LOCATION)
1984 		   ? gimple_filename ((const gimple *) edge->call_stmt)
1985 		   : "unknown",
1986 		   edge->call_stmt
1987 		   ? gimple_lineno ((const gimple *) edge->call_stmt)
1988 		   : -1,
1989 		   badness.to_double (),
1990 		   edge->sreal_frequency ().to_double ());
1991 	  if (edge->count.ipa ().initialized_p ())
1992 	    {
1993 	      fprintf (dump_file, " Called ");
1994 	      edge->count.ipa ().dump (dump_file);
1995 	      fprintf (dump_file, " times\n");
1996             }
1997 	  if (dump_flags & TDF_DETAILS)
1998 	    edge_badness (edge, true);
1999 	}
2000 
2001       if (overall_size + growth > max_size
2002 	  && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2003 	{
2004 	  edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2005 	  report_inline_failed_reason (edge);
2006 	  resolve_noninline_speculation (&edge_heap, edge);
2007 	  continue;
2008 	}
2009 
2010       if (!want_inline_small_function_p (edge, true))
2011 	{
2012 	  resolve_noninline_speculation (&edge_heap, edge);
2013 	  continue;
2014 	}
2015 
2016       /* Heuristics for inlining small functions work poorly for
2017 	 recursive calls where we do effects similar to loop unrolling.
2018 	 When inlining such edge seems profitable, leave decision on
2019 	 specific inliner.  */
2020       if (edge->recursive_p ())
2021 	{
2022 	  where = edge->caller;
2023 	  if (where->global.inlined_to)
2024 	    where = where->global.inlined_to;
2025 	  if (!recursive_inlining (edge,
2026 				   opt_for_fn (edge->caller->decl,
2027 					       flag_indirect_inlining)
2028 				   ? &new_indirect_edges : NULL))
2029 	    {
2030 	      edge->inline_failed = CIF_RECURSIVE_INLINING;
2031 	      resolve_noninline_speculation (&edge_heap, edge);
2032 	      continue;
2033 	    }
2034 	  reset_edge_caches (where);
2035 	  /* Recursive inliner inlines all recursive calls of the function
2036 	     at once. Consequently we need to update all callee keys.  */
2037 	  if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2038 	    add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2039           update_callee_keys (&edge_heap, where, updated_nodes);
2040 	  bitmap_clear (updated_nodes);
2041 	}
2042       else
2043 	{
2044 	  struct cgraph_node *outer_node = NULL;
2045 	  int depth = 0;
2046 
2047 	  /* Consider the case where self recursive function A is inlined
2048 	     into B.  This is desired optimization in some cases, since it
2049 	     leads to effect similar of loop peeling and we might completely
2050 	     optimize out the recursive call.  However we must be extra
2051 	     selective.  */
2052 
2053 	  where = edge->caller;
2054 	  while (where->global.inlined_to)
2055 	    {
2056 	      if (where->decl == callee->decl)
2057 		outer_node = where, depth++;
2058 	      where = where->callers->caller;
2059 	    }
2060 	  if (outer_node
2061 	      && !want_inline_self_recursive_call_p (edge, outer_node,
2062 						     true, depth))
2063 	    {
2064 	      edge->inline_failed
2065 		= (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2066 		   ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2067 	      resolve_noninline_speculation (&edge_heap, edge);
2068 	      continue;
2069 	    }
2070 	  else if (depth && dump_file)
2071 	    fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2072 
2073 	  gcc_checking_assert (!callee->global.inlined_to);
2074 	  inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2075 	  add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2076 
2077 	  reset_edge_caches (edge->callee);
2078 
2079 	  update_callee_keys (&edge_heap, where, updated_nodes);
2080 	}
2081       where = edge->caller;
2082       if (where->global.inlined_to)
2083 	where = where->global.inlined_to;
2084 
2085       /* Our profitability metric can depend on local properties
2086 	 such as number of inlinable calls and size of the function body.
2087 	 After inlining these properties might change for the function we
2088 	 inlined into (since it's body size changed) and for the functions
2089 	 called by function we inlined (since number of it inlinable callers
2090 	 might change).  */
2091       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2092       /* Offline copy count has possibly changed, recompute if profile is
2093 	 available.  */
2094       struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2095       if (n != edge->callee && n->analyzed && n->count.ipa ().initialized_p ())
2096 	update_callee_keys (&edge_heap, n, updated_nodes);
2097       bitmap_clear (updated_nodes);
2098 
2099       if (dump_enabled_p ())
2100 	{
2101 	  ipa_fn_summary *s = ipa_fn_summaries->get (edge->caller);
2102 
2103 	  /* dump_printf can't handle %+i.  */
2104 	  char buf_net_change[100];
2105 	  snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2106 		    overall_size - old_size);
2107 
2108 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2109 			   " Inlined %C into %C which now has time %f and "
2110 			   "size %i, net change of %s.\n",
2111 			   edge->callee, edge->caller,
2112 			   s->time.to_double (), s->size, buf_net_change);
2113 	}
2114       if (min_size > overall_size)
2115 	{
2116 	  min_size = overall_size;
2117 	  max_size = compute_max_insns (min_size);
2118 
2119 	  if (dump_file)
2120 	    fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2121 	}
2122     }
2123 
2124   free_growth_caches ();
2125   if (dump_enabled_p ())
2126     dump_printf (MSG_NOTE,
2127 		 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2128 		 initial_size, overall_size,
2129 		 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2130   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2131 }
2132 
2133 /* Flatten NODE.  Performed both during early inlining and
2134    at IPA inlining time.  */
2135 
2136 static void
flatten_function(struct cgraph_node * node,bool early,bool update)2137 flatten_function (struct cgraph_node *node, bool early, bool update)
2138 {
2139   struct cgraph_edge *e;
2140 
2141   /* We shouldn't be called recursively when we are being processed.  */
2142   gcc_assert (node->aux == NULL);
2143 
2144   node->aux = (void *) node;
2145 
2146   for (e = node->callees; e; e = e->next_callee)
2147     {
2148       struct cgraph_node *orig_callee;
2149       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2150 
2151       /* We've hit cycle?  It is time to give up.  */
2152       if (callee->aux)
2153 	{
2154 	  if (dump_enabled_p ())
2155 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2156 			     "Not inlining %C into %C to avoid cycle.\n",
2157 			     callee, e->caller);
2158 	  if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2159 	    e->inline_failed = CIF_RECURSIVE_INLINING;
2160 	  continue;
2161 	}
2162 
2163       /* When the edge is already inlined, we just need to recurse into
2164 	 it in order to fully flatten the leaves.  */
2165       if (!e->inline_failed)
2166 	{
2167 	  flatten_function (callee, early, false);
2168 	  continue;
2169 	}
2170 
2171       /* Flatten attribute needs to be processed during late inlining. For
2172 	 extra code quality we however do flattening during early optimization,
2173 	 too.  */
2174       if (!early
2175 	  ? !can_inline_edge_p (e, true)
2176 	    && !can_inline_edge_by_limits_p (e, true)
2177 	  : !can_early_inline_edge_p (e))
2178 	continue;
2179 
2180       if (e->recursive_p ())
2181 	{
2182 	  if (dump_enabled_p ())
2183 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2184 			     "Not inlining: recursive call.\n");
2185 	  continue;
2186 	}
2187 
2188       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2189 	  != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2190 	{
2191 	  if (dump_enabled_p ())
2192 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2193 			     "Not inlining: SSA form does not match.\n");
2194 	  continue;
2195 	}
2196 
2197       /* Inline the edge and flatten the inline clone.  Avoid
2198          recursing through the original node if the node was cloned.  */
2199       if (dump_enabled_p ())
2200 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2201 			 " Inlining %C into %C.\n",
2202 			 callee, e->caller);
2203       orig_callee = callee;
2204       inline_call (e, true, NULL, NULL, false);
2205       if (e->callee != orig_callee)
2206 	orig_callee->aux = (void *) node;
2207       flatten_function (e->callee, early, false);
2208       if (e->callee != orig_callee)
2209 	orig_callee->aux = NULL;
2210     }
2211 
2212   node->aux = NULL;
2213   if (update)
2214     ipa_update_overall_fn_summary (node->global.inlined_to
2215 				   ? node->global.inlined_to : node);
2216 }
2217 
2218 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2219    DATA points to number of calls originally found so we avoid infinite
2220    recursion.  */
2221 
2222 static bool
inline_to_all_callers_1(struct cgraph_node * node,void * data,hash_set<cgraph_node * > * callers)2223 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2224 			 hash_set<cgraph_node *> *callers)
2225 {
2226   int *num_calls = (int *)data;
2227   bool callee_removed = false;
2228 
2229   while (node->callers && !node->global.inlined_to)
2230     {
2231       struct cgraph_node *caller = node->callers->caller;
2232 
2233       if (!can_inline_edge_p (node->callers, true)
2234 	  || !can_inline_edge_by_limits_p (node->callers, true)
2235 	  || node->callers->recursive_p ())
2236 	{
2237 	  if (dump_file)
2238 	    fprintf (dump_file, "Uninlinable call found; giving up.\n");
2239 	  *num_calls = 0;
2240 	  return false;
2241 	}
2242 
2243       if (dump_file)
2244 	{
2245 	  cgraph_node *ultimate = node->ultimate_alias_target ();
2246 	  fprintf (dump_file,
2247 		   "\nInlining %s size %i.\n",
2248 		   ultimate->name (),
2249 		   ipa_fn_summaries->get (ultimate)->size);
2250 	  fprintf (dump_file,
2251 		   " Called once from %s %i insns.\n",
2252 		   node->callers->caller->name (),
2253 		   ipa_fn_summaries->get (node->callers->caller)->size);
2254 	}
2255 
2256       /* Remember which callers we inlined to, delaying updating the
2257 	 overall summary.  */
2258       callers->add (node->callers->caller);
2259       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2260       if (dump_file)
2261 	fprintf (dump_file,
2262 		 " Inlined into %s which now has %i size\n",
2263 		 caller->name (),
2264 		 ipa_fn_summaries->get (caller)->size);
2265       if (!(*num_calls)--)
2266 	{
2267 	  if (dump_file)
2268 	    fprintf (dump_file, "New calls found; giving up.\n");
2269 	  return callee_removed;
2270 	}
2271       if (callee_removed)
2272 	return true;
2273     }
2274   return false;
2275 }
2276 
2277 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2278    update.  */
2279 
2280 static bool
inline_to_all_callers(struct cgraph_node * node,void * data)2281 inline_to_all_callers (struct cgraph_node *node, void *data)
2282 {
2283   hash_set<cgraph_node *> callers;
2284   bool res = inline_to_all_callers_1 (node, data, &callers);
2285   /* Perform the delayed update of the overall summary of all callers
2286      processed.  This avoids quadratic behavior in the cases where
2287      we have a lot of calls to the same function.  */
2288   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2289        i != callers.end (); ++i)
2290     ipa_update_overall_fn_summary (*i);
2291   return res;
2292 }
2293 
2294 /* Output overall time estimate.  */
2295 static void
dump_overall_stats(void)2296 dump_overall_stats (void)
2297 {
2298   sreal sum_weighted = 0, sum = 0;
2299   struct cgraph_node *node;
2300 
2301   FOR_EACH_DEFINED_FUNCTION (node)
2302     if (!node->global.inlined_to
2303 	&& !node->alias)
2304       {
2305 	ipa_fn_summary *s = ipa_fn_summaries->get (node);
2306 	if (s != NULL)
2307 	  {
2308 	  sum += s->time;
2309 	  if (node->count.ipa ().initialized_p ())
2310 	    sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2311 	  }
2312       }
2313   fprintf (dump_file, "Overall time estimate: "
2314 	   "%f weighted by profile: "
2315 	   "%f\n", sum.to_double (), sum_weighted.to_double ());
2316 }
2317 
2318 /* Output some useful stats about inlining.  */
2319 
2320 static void
dump_inline_stats(void)2321 dump_inline_stats (void)
2322 {
2323   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2324   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2325   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2326   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2327   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2328   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2329   int64_t reason[CIF_N_REASONS][2];
2330   sreal reason_freq[CIF_N_REASONS];
2331   int i;
2332   struct cgraph_node *node;
2333 
2334   memset (reason, 0, sizeof (reason));
2335   for (i=0; i < CIF_N_REASONS; i++)
2336     reason_freq[i] = 0;
2337   FOR_EACH_DEFINED_FUNCTION (node)
2338   {
2339     struct cgraph_edge *e;
2340     for (e = node->callees; e; e = e->next_callee)
2341       {
2342 	if (e->inline_failed)
2343 	  {
2344 	    if (e->count.ipa ().initialized_p ())
2345 	      reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2346 	    reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2347 	    reason[(int) e->inline_failed][1] ++;
2348 	    if (DECL_VIRTUAL_P (e->callee->decl)
2349 		&& e->count.ipa ().initialized_p ())
2350 	      {
2351 		if (e->indirect_inlining_edge)
2352 		  noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2353 		else
2354 		  noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2355 	      }
2356 	    else if (e->count.ipa ().initialized_p ())
2357 	      {
2358 		if (e->indirect_inlining_edge)
2359 		  noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2360 		else
2361 		  noninlined_cnt += e->count.ipa ().to_gcov_type ();
2362 	      }
2363 	  }
2364 	else if (e->count.ipa ().initialized_p ())
2365 	  {
2366 	    if (e->speculative)
2367 	      {
2368 		if (DECL_VIRTUAL_P (e->callee->decl))
2369 		  inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2370 		else
2371 		  inlined_speculative += e->count.ipa ().to_gcov_type ();
2372 	      }
2373 	    else if (DECL_VIRTUAL_P (e->callee->decl))
2374 	      {
2375 		if (e->indirect_inlining_edge)
2376 		  inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2377 		else
2378 		  inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2379 	      }
2380 	    else
2381 	      {
2382 		if (e->indirect_inlining_edge)
2383 		  inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2384 		else
2385 		  inlined_cnt += e->count.ipa ().to_gcov_type ();
2386 	      }
2387 	  }
2388       }
2389     for (e = node->indirect_calls; e; e = e->next_callee)
2390       if (e->indirect_info->polymorphic
2391 	  & e->count.ipa ().initialized_p ())
2392 	indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2393       else if (e->count.ipa ().initialized_p ())
2394 	indirect_cnt += e->count.ipa ().to_gcov_type ();
2395   }
2396   if (max_count.initialized_p ())
2397     {
2398       fprintf (dump_file,
2399 	       "Inlined %" PRId64 " + speculative "
2400 	       "%" PRId64 " + speculative polymorphic "
2401 	       "%" PRId64 " + previously indirect "
2402 	       "%" PRId64 " + virtual "
2403 	       "%" PRId64 " + virtual and previously indirect "
2404 	       "%" PRId64 "\n" "Not inlined "
2405 	       "%" PRId64 " + previously indirect "
2406 	       "%" PRId64 " + virtual "
2407 	       "%" PRId64 " + virtual and previously indirect "
2408 	       "%" PRId64 " + stil indirect "
2409 	       "%" PRId64 " + still indirect polymorphic "
2410 	       "%" PRId64 "\n", inlined_cnt,
2411 	       inlined_speculative, inlined_speculative_ply,
2412 	       inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2413 	       noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2414 	       noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2415       fprintf (dump_file, "Removed speculations ");
2416       spec_rem.dump (dump_file);
2417       fprintf (dump_file, "\n");
2418     }
2419   dump_overall_stats ();
2420   fprintf (dump_file, "\nWhy inlining failed?\n");
2421   for (i = 0; i < CIF_N_REASONS; i++)
2422     if (reason[i][1])
2423       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2424 	       cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2425 	       (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2426 }
2427 
2428 /* Called when node is removed.  */
2429 
2430 static void
flatten_remove_node_hook(struct cgraph_node * node,void * data)2431 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2432 {
2433   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2434     return;
2435 
2436   hash_set<struct cgraph_node *> *removed
2437     = (hash_set<struct cgraph_node *> *) data;
2438   removed->add (node);
2439 }
2440 
2441 /* Decide on the inlining.  We do so in the topological order to avoid
2442    expenses on updating data structures.  */
2443 
2444 static unsigned int
ipa_inline(void)2445 ipa_inline (void)
2446 {
2447   struct cgraph_node *node;
2448   int nnodes;
2449   struct cgraph_node **order;
2450   int i, j;
2451   int cold;
2452   bool remove_functions = false;
2453 
2454   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2455 
2456   if (dump_file)
2457     ipa_dump_fn_summaries (dump_file);
2458 
2459   nnodes = ipa_reverse_postorder (order);
2460   spec_rem = profile_count::zero ();
2461 
2462   FOR_EACH_FUNCTION (node)
2463     {
2464       node->aux = 0;
2465 
2466       /* Recompute the default reasons for inlining because they may have
2467 	 changed during merging.  */
2468       if (in_lto_p)
2469 	{
2470 	  for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2471 	    {
2472 	      gcc_assert (e->inline_failed);
2473 	      initialize_inline_failed (e);
2474 	    }
2475 	  for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2476 	    initialize_inline_failed (e);
2477 	}
2478     }
2479 
2480   if (dump_file)
2481     fprintf (dump_file, "\nFlattening functions:\n");
2482 
2483   /* First shrink order array, so that it only contains nodes with
2484      flatten attribute.  */
2485   for (i = nnodes - 1, j = i; i >= 0; i--)
2486     {
2487       node = order[i];
2488       if (lookup_attribute ("flatten",
2489 			    DECL_ATTRIBUTES (node->decl)) != NULL)
2490 	order[j--] = order[i];
2491     }
2492 
2493   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2494      nodes with flatten attribute.  If there is more than one such
2495      node, we need to register a node removal hook, as flatten_function
2496      could remove other nodes with flatten attribute.  See PR82801.  */
2497   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2498   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2499   if (j < nnodes - 2)
2500     {
2501       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2502       node_removal_hook_holder
2503 	= symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2504 					   flatten_removed_nodes);
2505     }
2506 
2507   /* In the first pass handle functions to be flattened.  Do this with
2508      a priority so none of our later choices will make this impossible.  */
2509   for (i = nnodes - 1; i > j; i--)
2510     {
2511       node = order[i];
2512       if (flatten_removed_nodes
2513 	  && flatten_removed_nodes->contains (node))
2514 	continue;
2515 
2516       /* Handle nodes to be flattened.
2517 	 Ideally when processing callees we stop inlining at the
2518 	 entry of cycles, possibly cloning that entry point and
2519 	 try to flatten itself turning it into a self-recursive
2520 	 function.  */
2521       if (dump_file)
2522 	fprintf (dump_file, "Flattening %s\n", node->name ());
2523       flatten_function (node, false, true);
2524     }
2525 
2526   if (j < nnodes - 2)
2527     {
2528       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2529       delete flatten_removed_nodes;
2530     }
2531   free (order);
2532 
2533   if (dump_file)
2534     dump_overall_stats ();
2535 
2536   inline_small_functions ();
2537 
2538   gcc_assert (symtab->state == IPA_SSA);
2539   symtab->state = IPA_SSA_AFTER_INLINING;
2540   /* Do first after-inlining removal.  We want to remove all "stale" extern
2541      inline functions and virtual functions so we really know what is called
2542      once.  */
2543   symtab->remove_unreachable_nodes (dump_file);
2544 
2545   /* Inline functions with a property that after inlining into all callers the
2546      code size will shrink because the out-of-line copy is eliminated.
2547      We do this regardless on the callee size as long as function growth limits
2548      are met.  */
2549   if (dump_file)
2550     fprintf (dump_file,
2551 	     "\nDeciding on functions to be inlined into all callers and "
2552 	     "removing useless speculations:\n");
2553 
2554   /* Inlining one function called once has good chance of preventing
2555      inlining other function into the same callee.  Ideally we should
2556      work in priority order, but probably inlining hot functions first
2557      is good cut without the extra pain of maintaining the queue.
2558 
2559      ??? this is not really fitting the bill perfectly: inlining function
2560      into callee often leads to better optimization of callee due to
2561      increased context for optimization.
2562      For example if main() function calls a function that outputs help
2563      and then function that does the main optmization, we should inline
2564      the second with priority even if both calls are cold by themselves.
2565 
2566      We probably want to implement new predicate replacing our use of
2567      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2568      to be hot.  */
2569   for (cold = 0; cold <= 1; cold ++)
2570     {
2571       FOR_EACH_DEFINED_FUNCTION (node)
2572 	{
2573 	  struct cgraph_edge *edge, *next;
2574 	  bool update=false;
2575 
2576 	  if (!opt_for_fn (node->decl, optimize)
2577 	      || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2578 	    continue;
2579 
2580 	  for (edge = node->callees; edge; edge = next)
2581 	    {
2582 	      next = edge->next_callee;
2583 	      if (edge->speculative && !speculation_useful_p (edge, false))
2584 		{
2585 		  if (edge->count.ipa ().initialized_p ())
2586 		    spec_rem += edge->count.ipa ();
2587 		  edge->resolve_speculation ();
2588 		  update = true;
2589 		  remove_functions = true;
2590 		}
2591 	    }
2592 	  if (update)
2593 	    {
2594 	      struct cgraph_node *where = node->global.inlined_to
2595 					  ? node->global.inlined_to : node;
2596 	      reset_edge_caches (where);
2597 	      ipa_update_overall_fn_summary (where);
2598 	    }
2599 	  if (want_inline_function_to_all_callers_p (node, cold))
2600 	    {
2601 	      int num_calls = 0;
2602 	      node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2603 						 true);
2604 	      while (node->call_for_symbol_and_aliases
2605 		       (inline_to_all_callers, &num_calls, true))
2606 		;
2607 	      remove_functions = true;
2608 	    }
2609 	}
2610     }
2611 
2612   /* Free ipa-prop structures if they are no longer needed.  */
2613   ipa_free_all_structures_after_iinln ();
2614 
2615   if (dump_enabled_p ())
2616     dump_printf (MSG_NOTE,
2617 		 "\nInlined %i calls, eliminated %i functions\n\n",
2618 		 ncalls_inlined, nfunctions_inlined);
2619   if (dump_file)
2620     dump_inline_stats ();
2621 
2622   if (dump_file)
2623     ipa_dump_fn_summaries (dump_file);
2624   return remove_functions ? TODO_remove_functions : 0;
2625 }
2626 
2627 /* Inline always-inline function calls in NODE.  */
2628 
2629 static bool
inline_always_inline_functions(struct cgraph_node * node)2630 inline_always_inline_functions (struct cgraph_node *node)
2631 {
2632   struct cgraph_edge *e;
2633   bool inlined = false;
2634 
2635   for (e = node->callees; e; e = e->next_callee)
2636     {
2637       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2638       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2639 	continue;
2640 
2641       if (e->recursive_p ())
2642 	{
2643 	  if (dump_enabled_p ())
2644 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2645 			     "  Not inlining recursive call to %C.\n",
2646 			     e->callee);
2647 	  e->inline_failed = CIF_RECURSIVE_INLINING;
2648 	  continue;
2649 	}
2650 
2651       if (!can_early_inline_edge_p (e))
2652 	{
2653 	  /* Set inlined to true if the callee is marked "always_inline" but
2654 	     is not inlinable.  This will allow flagging an error later in
2655 	     expand_call_inline in tree-inline.c.  */
2656 	  if (lookup_attribute ("always_inline",
2657 				 DECL_ATTRIBUTES (callee->decl)) != NULL)
2658 	    inlined = true;
2659 	  continue;
2660 	}
2661 
2662       if (dump_enabled_p ())
2663 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2664 			 "  Inlining %C into %C (always_inline).\n",
2665 			 e->callee, e->caller);
2666       inline_call (e, true, NULL, NULL, false);
2667       inlined = true;
2668     }
2669   if (inlined)
2670     ipa_update_overall_fn_summary (node);
2671 
2672   return inlined;
2673 }
2674 
2675 /* Decide on the inlining.  We do so in the topological order to avoid
2676    expenses on updating data structures.  */
2677 
2678 static bool
early_inline_small_functions(struct cgraph_node * node)2679 early_inline_small_functions (struct cgraph_node *node)
2680 {
2681   struct cgraph_edge *e;
2682   bool inlined = false;
2683 
2684   for (e = node->callees; e; e = e->next_callee)
2685     {
2686       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2687 
2688       /* We can enounter not-yet-analyzed function during
2689 	 early inlining on callgraphs with strongly
2690 	 connected components.  */
2691       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2692       if (s == NULL || !s->inlinable || !e->inline_failed)
2693 	continue;
2694 
2695       /* Do not consider functions not declared inline.  */
2696       if (!DECL_DECLARED_INLINE_P (callee->decl)
2697 	  && !opt_for_fn (node->decl, flag_inline_small_functions)
2698 	  && !opt_for_fn (node->decl, flag_inline_functions))
2699 	continue;
2700 
2701       if (dump_enabled_p ())
2702 	dump_printf_loc (MSG_NOTE, e->call_stmt,
2703 			 "Considering inline candidate %C.\n",
2704 			 callee);
2705 
2706       if (!can_early_inline_edge_p (e))
2707 	continue;
2708 
2709       if (e->recursive_p ())
2710 	{
2711 	  if (dump_enabled_p ())
2712 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2713 			     "  Not inlining: recursive call.\n");
2714 	  continue;
2715 	}
2716 
2717       if (!want_early_inline_function_p (e))
2718 	continue;
2719 
2720       if (dump_enabled_p ())
2721 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2722 			 " Inlining %C into %C.\n",
2723 			 callee, e->caller);
2724       inline_call (e, true, NULL, NULL, false);
2725       inlined = true;
2726     }
2727 
2728   if (inlined)
2729     ipa_update_overall_fn_summary (node);
2730 
2731   return inlined;
2732 }
2733 
2734 unsigned int
early_inliner(function * fun)2735 early_inliner (function *fun)
2736 {
2737   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2738   struct cgraph_edge *edge;
2739   unsigned int todo = 0;
2740   int iterations = 0;
2741   bool inlined = false;
2742 
2743   if (seen_error ())
2744     return 0;
2745 
2746   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2747      happens when some pass decides to construct new function and
2748      cgraph_add_new_function calls lowering passes and early optimization on
2749      it.  This may confuse ourself when early inliner decide to inline call to
2750      function clone, because function clones don't have parameter list in
2751      ipa-prop matching their signature.  */
2752   if (ipa_node_params_sum)
2753     return 0;
2754 
2755   if (flag_checking)
2756     node->verify ();
2757   node->remove_all_references ();
2758 
2759   /* Even when not optimizing or not inlining inline always-inline
2760      functions.  */
2761   inlined = inline_always_inline_functions (node);
2762 
2763   if (!optimize
2764       || flag_no_inline
2765       || !flag_early_inlining
2766       /* Never inline regular functions into always-inline functions
2767 	 during incremental inlining.  This sucks as functions calling
2768 	 always inline functions will get less optimized, but at the
2769 	 same time inlining of functions calling always inline
2770 	 function into an always inline function might introduce
2771 	 cycles of edges to be always inlined in the callgraph.
2772 
2773 	 We might want to be smarter and just avoid this type of inlining.  */
2774       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2775 	  && lookup_attribute ("always_inline",
2776 			       DECL_ATTRIBUTES (node->decl))))
2777     ;
2778   else if (lookup_attribute ("flatten",
2779 			     DECL_ATTRIBUTES (node->decl)) != NULL)
2780     {
2781       /* When the function is marked to be flattened, recursively inline
2782 	 all calls in it.  */
2783       if (dump_enabled_p ())
2784 	dump_printf (MSG_OPTIMIZED_LOCATIONS,
2785 		     "Flattening %C\n", node);
2786       flatten_function (node, true, true);
2787       inlined = true;
2788     }
2789   else
2790     {
2791       /* If some always_inline functions was inlined, apply the changes.
2792 	 This way we will not account always inline into growth limits and
2793 	 moreover we will inline calls from always inlines that we skipped
2794 	 previously because of conditional above.  */
2795       if (inlined)
2796 	{
2797 	  timevar_push (TV_INTEGRATION);
2798 	  todo |= optimize_inline_calls (current_function_decl);
2799 	  /* optimize_inline_calls call above might have introduced new
2800 	     statements that don't have inline parameters computed.  */
2801 	  for (edge = node->callees; edge; edge = edge->next_callee)
2802 	    {
2803 	      /* We can enounter not-yet-analyzed function during
2804 		 early inlining on callgraphs with strongly
2805 		 connected components.  */
2806 	      ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2807 	      es->call_stmt_size
2808 		= estimate_num_insns (edge->call_stmt, &eni_size_weights);
2809 	      es->call_stmt_time
2810 		= estimate_num_insns (edge->call_stmt, &eni_time_weights);
2811 	    }
2812 	  ipa_update_overall_fn_summary (node);
2813 	  inlined = false;
2814 	  timevar_pop (TV_INTEGRATION);
2815 	}
2816       /* We iterate incremental inlining to get trivial cases of indirect
2817 	 inlining.  */
2818       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2819 	     && early_inline_small_functions (node))
2820 	{
2821 	  timevar_push (TV_INTEGRATION);
2822 	  todo |= optimize_inline_calls (current_function_decl);
2823 
2824 	  /* Technically we ought to recompute inline parameters so the new
2825  	     iteration of early inliner works as expected.  We however have
2826 	     values approximately right and thus we only need to update edge
2827 	     info that might be cleared out for newly discovered edges.  */
2828 	  for (edge = node->callees; edge; edge = edge->next_callee)
2829 	    {
2830 	      /* We have no summary for new bound store calls yet.  */
2831 	      ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2832 	      es->call_stmt_size
2833 		= estimate_num_insns (edge->call_stmt, &eni_size_weights);
2834 	      es->call_stmt_time
2835 		= estimate_num_insns (edge->call_stmt, &eni_time_weights);
2836 
2837 	      if (edge->callee->decl
2838 		  && !gimple_check_call_matching_types (
2839 		      edge->call_stmt, edge->callee->decl, false))
2840 		{
2841  		  edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
2842 		  edge->call_stmt_cannot_inline_p = true;
2843 		}
2844 	    }
2845 	  if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2846 	    ipa_update_overall_fn_summary (node);
2847 	  timevar_pop (TV_INTEGRATION);
2848 	  iterations++;
2849 	  inlined = false;
2850 	}
2851       if (dump_file)
2852 	fprintf (dump_file, "Iterations: %i\n", iterations);
2853     }
2854 
2855   if (inlined)
2856     {
2857       timevar_push (TV_INTEGRATION);
2858       todo |= optimize_inline_calls (current_function_decl);
2859       timevar_pop (TV_INTEGRATION);
2860     }
2861 
2862   fun->always_inline_functions_inlined = true;
2863 
2864   return todo;
2865 }
2866 
2867 /* Do inlining of small functions.  Doing so early helps profiling and other
2868    passes to be somewhat more effective and avoids some code duplication in
2869    later real inlining pass for testcases with very many function calls.  */
2870 
2871 namespace {
2872 
2873 const pass_data pass_data_early_inline =
2874 {
2875   GIMPLE_PASS, /* type */
2876   "einline", /* name */
2877   OPTGROUP_INLINE, /* optinfo_flags */
2878   TV_EARLY_INLINING, /* tv_id */
2879   PROP_ssa, /* properties_required */
2880   0, /* properties_provided */
2881   0, /* properties_destroyed */
2882   0, /* todo_flags_start */
2883   0, /* todo_flags_finish */
2884 };
2885 
2886 class pass_early_inline : public gimple_opt_pass
2887 {
2888 public:
pass_early_inline(gcc::context * ctxt)2889   pass_early_inline (gcc::context *ctxt)
2890     : gimple_opt_pass (pass_data_early_inline, ctxt)
2891   {}
2892 
2893   /* opt_pass methods: */
2894   virtual unsigned int execute (function *);
2895 
2896 }; // class pass_early_inline
2897 
2898 unsigned int
execute(function * fun)2899 pass_early_inline::execute (function *fun)
2900 {
2901   return early_inliner (fun);
2902 }
2903 
2904 } // anon namespace
2905 
2906 gimple_opt_pass *
make_pass_early_inline(gcc::context * ctxt)2907 make_pass_early_inline (gcc::context *ctxt)
2908 {
2909   return new pass_early_inline (ctxt);
2910 }
2911 
2912 namespace {
2913 
2914 const pass_data pass_data_ipa_inline =
2915 {
2916   IPA_PASS, /* type */
2917   "inline", /* name */
2918   OPTGROUP_INLINE, /* optinfo_flags */
2919   TV_IPA_INLINING, /* tv_id */
2920   0, /* properties_required */
2921   0, /* properties_provided */
2922   0, /* properties_destroyed */
2923   0, /* todo_flags_start */
2924   ( TODO_dump_symtab ), /* todo_flags_finish */
2925 };
2926 
2927 class pass_ipa_inline : public ipa_opt_pass_d
2928 {
2929 public:
pass_ipa_inline(gcc::context * ctxt)2930   pass_ipa_inline (gcc::context *ctxt)
2931     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2932 		      NULL, /* generate_summary */
2933 		      NULL, /* write_summary */
2934 		      NULL, /* read_summary */
2935 		      NULL, /* write_optimization_summary */
2936 		      NULL, /* read_optimization_summary */
2937 		      NULL, /* stmt_fixup */
2938 		      0, /* function_transform_todo_flags_start */
2939 		      inline_transform, /* function_transform */
2940 		      NULL) /* variable_transform */
2941   {}
2942 
2943   /* opt_pass methods: */
execute(function *)2944   virtual unsigned int execute (function *) { return ipa_inline (); }
2945 
2946 }; // class pass_ipa_inline
2947 
2948 } // anon namespace
2949 
2950 ipa_opt_pass_d *
make_pass_ipa_inline(gcc::context * ctxt)2951 make_pass_ipa_inline (gcc::context *ctxt)
2952 {
2953   return new pass_ipa_inline (ctxt);
2954 }
2955