1 /* Inlining decision heuristics.
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3    Contributed by Jan Hubicka
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /*  Inlining decision heuristics
22 
23     The implementation of inliner is organized as follows:
24 
25     inlining heuristics limits
26 
27       can_inline_edge_p allow to check that particular inlining is allowed
28       by the limits specified by user (allowed function growth, growth and so
29       on).
30 
31       Functions are inlined when it is obvious the result is profitable (such
32       as functions called once or when inlining reduce code size).
33       In addition to that we perform inlining of small functions and recursive
34       inlining.
35 
36     inlining heuristics
37 
38        The inliner itself is split into two passes:
39 
40        pass_early_inlining
41 
42 	 Simple local inlining pass inlining callees into current function.
43 	 This pass makes no use of whole unit analysis and thus it can do only
44 	 very simple decisions based on local properties.
45 
46 	 The strength of the pass is that it is run in topological order
47 	 (reverse postorder) on the callgraph. Functions are converted into SSA
48 	 form just before this pass and optimized subsequently. As a result, the
49 	 callees of the function seen by the early inliner was already optimized
50 	 and results of early inlining adds a lot of optimization opportunities
51 	 for the local optimization.
52 
53 	 The pass handle the obvious inlining decisions within the compilation
54 	 unit - inlining auto inline functions, inlining for size and
55 	 flattening.
56 
57 	 main strength of the pass is the ability to eliminate abstraction
58 	 penalty in C++ code (via combination of inlining and early
59 	 optimization) and thus improve quality of analysis done by real IPA
60 	 optimizers.
61 
62 	 Because of lack of whole unit knowledge, the pass cannot really make
63 	 good code size/performance tradeoffs.  It however does very simple
64 	 speculative inlining allowing code size to grow by
65 	 EARLY_INLINING_INSNS when callee is leaf function.  In this case the
66 	 optimizations performed later are very likely to eliminate the cost.
67 
68        pass_ipa_inline
69 
70 	 This is the real inliner able to handle inlining with whole program
71 	 knowledge. It performs following steps:
72 
73 	 1) inlining of small functions.  This is implemented by greedy
74 	 algorithm ordering all inlinable cgraph edges by their badness and
75 	 inlining them in this order as long as inline limits allows doing so.
76 
77 	 This heuristics is not very good on inlining recursive calls. Recursive
78 	 calls can be inlined with results similar to loop unrolling. To do so,
79 	 special purpose recursive inliner is executed on function when
80 	 recursive edge is met as viable candidate.
81 
82 	 2) Unreachable functions are removed from callgraph.  Inlining leads
83 	 to devirtualization and other modification of callgraph so functions
84 	 may become unreachable during the process. Also functions declared as
85 	 extern inline or virtual functions are removed, since after inlining
86 	 we no longer need the offline bodies.
87 
88 	 3) Functions called once and not exported from the unit are inlined.
89 	 This should almost always lead to reduction of code size by eliminating
90 	 the need for offline copy of the function.  */
91 
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "target.h"
97 #include "rtl.h"
98 #include "tree.h"
99 #include "gimple.h"
100 #include "alloc-pool.h"
101 #include "tree-pass.h"
102 #include "gimple-ssa.h"
103 #include "cgraph.h"
104 #include "lto-streamer.h"
105 #include "trans-mem.h"
106 #include "calls.h"
107 #include "tree-inline.h"
108 #include "profile.h"
109 #include "symbol-summary.h"
110 #include "tree-vrp.h"
111 #include "ipa-prop.h"
112 #include "ipa-fnsummary.h"
113 #include "ipa-inline.h"
114 #include "ipa-utils.h"
115 #include "sreal.h"
116 #include "auto-profile.h"
117 #include "builtins.h"
118 #include "fibonacci_heap.h"
119 #include "stringpool.h"
120 #include "attribs.h"
121 #include "asan.h"
122 
123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
125 
126 /* Statistics we collect about inlining algorithm.  */
127 static int overall_size;
128 static profile_count max_count;
129 static profile_count spec_rem;
130 
131 /* Return false when inlining edge E would lead to violating
132    limits on function unit growth or stack usage growth.
133 
134    The relative function body growth limit is present generally
135    to avoid problems with non-linear behavior of the compiler.
136    To allow inlining huge functions into tiny wrapper, the limit
137    is always based on the bigger of the two functions considered.
138 
139    For stack growth limits we always base the growth in stack usage
140    of the callers.  We want to prevent applications from segfaulting
141    on stack overflow when functions with huge stack frames gets
142    inlined. */
143 
144 static bool
caller_growth_limits(struct cgraph_edge * e)145 caller_growth_limits (struct cgraph_edge *e)
146 {
147   struct cgraph_node *to = e->caller;
148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
149   int newsize;
150   int limit = 0;
151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
153 
154   /* Look for function e->caller is inlined to.  While doing
155      so work out the largest function body on the way.  As
156      described above, we want to base our function growth
157      limits based on that.  Not on the self size of the
158      outer function, not on the self size of inline code
159      we immediately inline to.  This is the most relaxed
160      interpretation of the rule "do not grow large functions
161      too much in order to prevent compiler from exploding".  */
162   while (true)
163     {
164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
165       if (limit < size_info->self_size)
166 	limit = size_info->self_size;
167       if (stack_size_limit < size_info->estimated_self_stack_size)
168 	stack_size_limit = size_info->estimated_self_stack_size;
169       if (to->inlined_to)
170         to = to->callers->caller;
171       else
172 	break;
173     }
174 
175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
177 
178   if (limit < what_size_info->self_size)
179     limit = what_size_info->self_size;
180 
181   limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
182 
183   /* Check the size after inlining against the function limits.  But allow
184      the function to shrink if it went over the limits by forced inlining.  */
185   newsize = estimate_size_after_inlining (to, e);
186   if (newsize >= ipa_size_summaries->get (what)->size
187       && newsize > opt_for_fn (to->decl, param_large_function_insns)
188       && newsize > limit)
189     {
190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
191       return false;
192     }
193 
194   if (!what_info->estimated_stack_size)
195     return true;
196 
197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
198      due to large i/o datastructures used by the Fortran front-end.
199      We ought to ignore this limit when we know that the edge is executed
200      on every invocation of the caller (i.e. its call statement dominates
201      exit block).  We do not track this information, yet.  */
202   stack_size_limit += ((gcov_type)stack_size_limit
203 		       * opt_for_fn (to->decl, param_stack_frame_growth)
204 		       / 100);
205 
206   inlined_stack = (ipa_get_stack_frame_offset (to)
207 		   + outer_info->estimated_self_stack_size
208 		   + what_info->estimated_stack_size);
209   /* Check new stack consumption with stack consumption at the place
210      stack is used.  */
211   if (inlined_stack > stack_size_limit
212       /* If function already has large stack usage from sibling
213 	 inline call, we can inline, too.
214 	 This bit overoptimistically assume that we are good at stack
215 	 packing.  */
216       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
217       && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
218     {
219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
220       return false;
221     }
222   return true;
223 }
224 
225 /* Dump info about why inlining has failed.  */
226 
227 static void
report_inline_failed_reason(struct cgraph_edge * e)228 report_inline_failed_reason (struct cgraph_edge *e)
229 {
230   if (dump_enabled_p ())
231     {
232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
233 		       "  not inlinable: %C -> %C, %s\n",
234 		       e->caller, e->callee,
235 		       cgraph_inline_failed_string (e->inline_failed));
236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
237 	   || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
238 	  && e->caller->lto_file_data
239 	  && e->callee->ultimate_alias_target ()->lto_file_data)
240 	{
241 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
242 			   "  LTO objects: %s, %s\n",
243 			   e->caller->lto_file_data->file_name,
244 			   e->callee->ultimate_alias_target ()->lto_file_data->file_name);
245 	}
246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
247 	if (dump_file)
248 	  cl_target_option_print_diff
249 	    (dump_file, 2, target_opts_for_fn (e->caller->decl),
250 	     target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
252 	if (dump_file)
253 	  cl_optimization_print_diff
254 	    (dump_file, 2, opts_for_fn (e->caller->decl),
255 	     opts_for_fn (e->callee->ultimate_alias_target ()->decl));
256     }
257 }
258 
259  /* Decide whether sanitizer-related attributes allow inlining. */
260 
261 static bool
sanitize_attrs_match_for_inline_p(const_tree caller,const_tree callee)262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
263 {
264   if (!caller || !callee)
265     return true;
266 
267   /* Follow clang and allow inlining for always_inline functions.  */
268   if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
269     return true;
270 
271   const sanitize_code codes[] =
272     {
273       SANITIZE_ADDRESS,
274       SANITIZE_THREAD,
275       SANITIZE_UNDEFINED,
276       SANITIZE_UNDEFINED_NONDEFAULT,
277       SANITIZE_POINTER_COMPARE,
278       SANITIZE_POINTER_SUBTRACT
279     };
280 
281   for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
282     if (sanitize_flags_p (codes[i], caller)
283 	!= sanitize_flags_p (codes[i], callee))
284       return false;
285 
286   return true;
287 }
288 
289 /* Used for flags where it is safe to inline when caller's value is
290    grater than callee's.  */
291 #define check_maybe_up(flag) \
292       (opts_for_fn (caller->decl)->x_##flag		\
293        != opts_for_fn (callee->decl)->x_##flag		\
294        && (!always_inline 				\
295 	   || opts_for_fn (caller->decl)->x_##flag	\
296 	      < opts_for_fn (callee->decl)->x_##flag))
297 /* Used for flags where it is safe to inline when caller's value is
298    smaller than callee's.  */
299 #define check_maybe_down(flag) \
300       (opts_for_fn (caller->decl)->x_##flag		\
301        != opts_for_fn (callee->decl)->x_##flag		\
302        && (!always_inline 				\
303 	   || opts_for_fn (caller->decl)->x_##flag	\
304 	      > opts_for_fn (callee->decl)->x_##flag))
305 /* Used for flags where exact match is needed for correctness.  */
306 #define check_match(flag) \
307       (opts_for_fn (caller->decl)->x_##flag		\
308        != opts_for_fn (callee->decl)->x_##flag)
309 
310 /* Decide if we can inline the edge and possibly update
311    inline_failed reason.
312    We check whether inlining is possible at all and whether
313    caller growth limits allow doing so.
314 
315    if REPORT is true, output reason to the dump file. */
316 
317 static bool
318 can_inline_edge_p (struct cgraph_edge *e, bool report,
319 		   bool early = false)
320 {
321   gcc_checking_assert (e->inline_failed);
322 
323   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
324     {
325       if (report)
326         report_inline_failed_reason (e);
327       return false;
328     }
329 
330   bool inlinable = true;
331   enum availability avail;
332   cgraph_node *caller = (e->caller->inlined_to
333 			 ? e->caller->inlined_to : e->caller);
334   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
335 
336   if (!callee->definition)
337     {
338       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
339       inlinable = false;
340     }
341   if (!early && (!opt_for_fn (callee->decl, optimize)
342 		 || !opt_for_fn (caller->decl, optimize)))
343     {
344       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
345       inlinable = false;
346     }
347   else if (callee->calls_comdat_local)
348     {
349       e->inline_failed = CIF_USES_COMDAT_LOCAL;
350       inlinable = false;
351     }
352   else if (avail <= AVAIL_INTERPOSABLE)
353     {
354       e->inline_failed = CIF_OVERWRITABLE;
355       inlinable = false;
356     }
357   /* All edges with call_stmt_cannot_inline_p should have inline_failed
358      initialized to one of FINAL_ERROR reasons.  */
359   else if (e->call_stmt_cannot_inline_p)
360     gcc_unreachable ();
361   /* Don't inline if the functions have different EH personalities.  */
362   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
363 	   && DECL_FUNCTION_PERSONALITY (callee->decl)
364 	   && (DECL_FUNCTION_PERSONALITY (caller->decl)
365 	       != DECL_FUNCTION_PERSONALITY (callee->decl)))
366     {
367       e->inline_failed = CIF_EH_PERSONALITY;
368       inlinable = false;
369     }
370   /* TM pure functions should not be inlined into non-TM_pure
371      functions.  */
372   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
373     {
374       e->inline_failed = CIF_UNSPECIFIED;
375       inlinable = false;
376     }
377   /* Check compatibility of target optimization options.  */
378   else if (!targetm.target_option.can_inline_p (caller->decl,
379 						callee->decl))
380     {
381       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
382       inlinable = false;
383     }
384   else if (ipa_fn_summaries->get (callee) == NULL
385 	   || !ipa_fn_summaries->get (callee)->inlinable)
386     {
387       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
388       inlinable = false;
389     }
390   /* Don't inline a function with mismatched sanitization attributes. */
391   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
392     {
393       e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
394       inlinable = false;
395     }
396   if (!inlinable && report)
397     report_inline_failed_reason (e);
398   return inlinable;
399 }
400 
401 /* Return inlining_insns_single limit for function N.  If HINT or HINT2 is true
402    scale up the bound.  */
403 
404 static int
inline_insns_single(cgraph_node * n,bool hint,bool hint2)405 inline_insns_single (cgraph_node *n, bool hint, bool hint2)
406 {
407   if (hint && hint2)
408     {
409       int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
410       spd = spd * spd;
411       if (spd > 1000000)
412 	spd = 1000000;
413       return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
414     }
415   if (hint || hint2)
416     return opt_for_fn (n->decl, param_max_inline_insns_single)
417 	   * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
418   return opt_for_fn (n->decl, param_max_inline_insns_single);
419 }
420 
421 /* Return inlining_insns_auto limit for function N.  If HINT or HINT2 is true
422    scale up the bound.   */
423 
424 static int
inline_insns_auto(cgraph_node * n,bool hint,bool hint2)425 inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
426 {
427   int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
428   if (hint && hint2)
429     {
430       int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
431       spd = spd * spd;
432       if (spd > 1000000)
433 	spd = 1000000;
434       return max_inline_insns_auto * spd / 100;
435     }
436   if (hint || hint2)
437     return max_inline_insns_auto
438 	   * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
439   return max_inline_insns_auto;
440 }
441 
442 /* Decide if we can inline the edge and possibly update
443    inline_failed reason.
444    We check whether inlining is possible at all and whether
445    caller growth limits allow doing so.
446 
447    if REPORT is true, output reason to the dump file.
448 
449    if DISREGARD_LIMITS is true, ignore size limits.  */
450 
451 static bool
452 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
453 		             bool disregard_limits = false, bool early = false)
454 {
455   gcc_checking_assert (e->inline_failed);
456 
457   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
458     {
459       if (report)
460         report_inline_failed_reason (e);
461       return false;
462     }
463 
464   bool inlinable = true;
465   enum availability avail;
466   cgraph_node *caller = (e->caller->inlined_to
467 			 ? e->caller->inlined_to : e->caller);
468   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
469   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
470   tree callee_tree
471     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
472   /* Check if caller growth allows the inlining.  */
473   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
474       && !disregard_limits
475       && !lookup_attribute ("flatten",
476      		 DECL_ATTRIBUTES (caller->decl))
477       && !caller_growth_limits (e))
478     inlinable = false;
479   else if (callee->externally_visible
480 	   && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
481 	   && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
482     {
483       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
484       inlinable = false;
485     }
486   /* Don't inline a function with a higher optimization level than the
487      caller.  FIXME: this is really just tip of iceberg of handling
488      optimization attribute.  */
489   else if (caller_tree != callee_tree)
490     {
491       bool always_inline =
492 	     (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
493 	      && lookup_attribute ("always_inline",
494 				   DECL_ATTRIBUTES (callee->decl)));
495       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
496       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
497 
498      /* Until GCC 4.9 we did not check the semantics-altering flags
499 	below and inlined across optimization boundaries.
500 	Enabling checks below breaks several packages by refusing
501 	to inline library always_inline functions. See PR65873.
502 	Disable the check for early inlining for now until better solution
503 	is found.  */
504      if (always_inline && early)
505 	;
506       /* There are some options that change IL semantics which means
507          we cannot inline in these cases for correctness reason.
508 	 Not even for always_inline declared functions.  */
509      else if (check_match (flag_wrapv)
510 	      || check_match (flag_trapv)
511 	      || check_match (flag_pcc_struct_return)
512 	      || check_maybe_down (optimize_debug)
513 	      /* When caller or callee does FP math, be sure FP codegen flags
514 		 compatible.  */
515 	      || ((caller_info->fp_expressions && callee_info->fp_expressions)
516 		  && (check_maybe_up (flag_rounding_math)
517 		      || check_maybe_up (flag_trapping_math)
518 		      || check_maybe_down (flag_unsafe_math_optimizations)
519 		      || check_maybe_down (flag_finite_math_only)
520 		      || check_maybe_up (flag_signaling_nans)
521 		      || check_maybe_down (flag_cx_limited_range)
522 		      || check_maybe_up (flag_signed_zeros)
523 		      || check_maybe_down (flag_associative_math)
524 		      || check_maybe_down (flag_reciprocal_math)
525 		      || check_maybe_down (flag_fp_int_builtin_inexact)
526 		      /* Strictly speaking only when the callee contains function
527 			 calls that may end up setting errno.  */
528 		      || check_maybe_up (flag_errno_math)))
529 	      /* We do not want to make code compiled with exceptions to be
530 		 brought into a non-EH function unless we know that the callee
531 		 does not throw.
532 		 This is tracked by DECL_FUNCTION_PERSONALITY.  */
533 	      || (check_maybe_up (flag_non_call_exceptions)
534 		  && DECL_FUNCTION_PERSONALITY (callee->decl))
535 	      || (check_maybe_up (flag_exceptions)
536 		  && DECL_FUNCTION_PERSONALITY (callee->decl))
537 	      /* When devirtualization is disabled for callee, it is not safe
538 		 to inline it as we possibly mangled the type info.
539 		 Allow early inlining of always inlines.  */
540 	      || (!early && check_maybe_down (flag_devirtualize)))
541 	{
542 	  e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
543 	  inlinable = false;
544 	}
545       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
546       else if (always_inline)
547 	;
548       /* When user added an attribute to the callee honor it.  */
549       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
550 	       && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
551 	{
552 	  e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
553 	  inlinable = false;
554 	}
555       /* If explicit optimize attribute are not used, the mismatch is caused
556 	 by different command line options used to build different units.
557 	 Do not care about COMDAT functions - those are intended to be
558          optimized with the optimization flags of module they are used in.
559 	 Also do not care about mixing up size/speed optimization when
560 	 DECL_DISREGARD_INLINE_LIMITS is set.  */
561       else if ((callee->merged_comdat
562 	        && !lookup_attribute ("optimize",
563 				      DECL_ATTRIBUTES (caller->decl)))
564 	       || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
565 	;
566       /* If mismatch is caused by merging two LTO units with different
567 	 optimization flags we want to be bit nicer.  However never inline
568 	 if one of functions is not optimized at all.  */
569       else if (!opt_for_fn (callee->decl, optimize)
570       	       || !opt_for_fn (caller->decl, optimize))
571 	{
572 	  e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
573 	  inlinable = false;
574 	}
575       /* If callee is optimized for size and caller is not, allow inlining if
576 	 code shrinks or we are in param_max_inline_insns_single limit and
577 	 callee is inline (and thus likely an unified comdat).
578 	 This will allow caller to run faster.  */
579       else if (opt_for_fn (callee->decl, optimize_size)
580 	       > opt_for_fn (caller->decl, optimize_size))
581 	{
582 	  int growth = estimate_edge_growth (e);
583 	  if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
584 	      && (!DECL_DECLARED_INLINE_P (callee->decl)
585 		  && growth >= MAX (inline_insns_single (caller, false, false),
586 				    inline_insns_auto (caller, false, false))))
587 	    {
588 	      e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
589 	      inlinable = false;
590 	    }
591 	}
592       /* If callee is more aggressively optimized for performance than caller,
593 	 we generally want to inline only cheap (runtime wise) functions.  */
594       else if (opt_for_fn (callee->decl, optimize_size)
595 	       < opt_for_fn (caller->decl, optimize_size)
596 	       || (opt_for_fn (callee->decl, optimize)
597 		   > opt_for_fn (caller->decl, optimize)))
598 	{
599 	  if (estimate_edge_time (e)
600 	      >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
601 	    {
602 	      e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
603 	      inlinable = false;
604 	    }
605 	}
606 
607     }
608 
609   if (!inlinable && report)
610     report_inline_failed_reason (e);
611   return inlinable;
612 }
613 
614 
615 /* Return true if the edge E is inlinable during early inlining.  */
616 
617 static bool
can_early_inline_edge_p(struct cgraph_edge * e)618 can_early_inline_edge_p (struct cgraph_edge *e)
619 {
620   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
621   /* Early inliner might get called at WPA stage when IPA pass adds new
622      function.  In this case we cannot really do any of early inlining
623      because function bodies are missing.  */
624   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
625     return false;
626   if (!gimple_has_body_p (callee->decl))
627     {
628       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
629       return false;
630     }
631   /* In early inliner some of callees may not be in SSA form yet
632      (i.e. the callgraph is cyclic and we did not process
633      the callee by early inliner, yet).  We don't have CIF code for this
634      case; later we will re-do the decision in the real inliner.  */
635   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
636       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
637     {
638       if (dump_enabled_p ())
639 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
640 			 "  edge not inlinable: not in SSA form\n");
641       return false;
642     }
643   if (!can_inline_edge_p (e, true, true)
644       || !can_inline_edge_by_limits_p (e, true, false, true))
645     return false;
646   return true;
647 }
648 
649 
650 /* Return number of calls in N.  Ignore cheap builtins.  */
651 
652 static int
num_calls(struct cgraph_node * n)653 num_calls (struct cgraph_node *n)
654 {
655   struct cgraph_edge *e;
656   int num = 0;
657 
658   for (e = n->callees; e; e = e->next_callee)
659     if (!is_inexpensive_builtin (e->callee->decl))
660       num++;
661   return num;
662 }
663 
664 
665 /* Return true if we are interested in inlining small function.  */
666 
667 static bool
want_early_inline_function_p(struct cgraph_edge * e)668 want_early_inline_function_p (struct cgraph_edge *e)
669 {
670   bool want_inline = true;
671   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
672 
673   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
674     ;
675   /* For AutoFDO, we need to make sure that before profile summary, all
676      hot paths' IR look exactly the same as profiled binary. As a result,
677      in einliner, we will disregard size limit and inline those callsites
678      that are:
679        * inlined in the profiled binary, and
680        * the cloned callee has enough samples to be considered "hot".  */
681   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
682     ;
683   else if (!DECL_DECLARED_INLINE_P (callee->decl)
684 	   && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
685     {
686       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
687       report_inline_failed_reason (e);
688       want_inline = false;
689     }
690   else
691     {
692       /* First take care of very large functions.  */
693       int min_growth = estimate_min_edge_growth (e), growth = 0;
694       int n;
695       int early_inlining_insns = param_early_inlining_insns;
696 
697       if (min_growth > early_inlining_insns)
698 	{
699 	  if (dump_enabled_p ())
700 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
701 			     "  will not early inline: %C->%C, "
702 			     "call is cold and code would grow "
703 			     "at least by %i\n",
704 			     e->caller, callee,
705 			     min_growth);
706 	  want_inline = false;
707 	}
708       else
709         growth = estimate_edge_growth (e);
710 
711 
712       if (!want_inline || growth <= param_max_inline_insns_size)
713 	;
714       else if (!e->maybe_hot_p ())
715 	{
716 	  if (dump_enabled_p ())
717 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
718 			     "  will not early inline: %C->%C, "
719 			     "call is cold and code would grow by %i\n",
720 			     e->caller, callee,
721 			     growth);
722 	  want_inline = false;
723 	}
724       else if (growth > early_inlining_insns)
725 	{
726 	  if (dump_enabled_p ())
727 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
728 			     "  will not early inline: %C->%C, "
729 			     "growth %i exceeds --param early-inlining-insns\n",
730 			     e->caller, callee, growth);
731 	  want_inline = false;
732 	}
733       else if ((n = num_calls (callee)) != 0
734 	       && growth * (n + 1) > early_inlining_insns)
735 	{
736 	  if (dump_enabled_p ())
737 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
738 			     "  will not early inline: %C->%C, "
739 			     "growth %i exceeds --param early-inlining-insns "
740 			     "divided by number of calls\n",
741 			     e->caller, callee, growth);
742 	  want_inline = false;
743 	}
744     }
745   return want_inline;
746 }
747 
748 /* Compute time of the edge->caller + edge->callee execution when inlining
749    does not happen.  */
750 
751 inline sreal
compute_uninlined_call_time(struct cgraph_edge * edge,sreal uninlined_call_time,sreal freq)752 compute_uninlined_call_time (struct cgraph_edge *edge,
753 			     sreal uninlined_call_time,
754 			     sreal freq)
755 {
756   cgraph_node *caller = (edge->caller->inlined_to
757 			 ? edge->caller->inlined_to
758 			 : edge->caller);
759 
760   if (freq > 0)
761     uninlined_call_time *= freq;
762   else
763     uninlined_call_time = uninlined_call_time >> 11;
764 
765   sreal caller_time = ipa_fn_summaries->get (caller)->time;
766   return uninlined_call_time + caller_time;
767 }
768 
769 /* Same as compute_uinlined_call_time but compute time when inlining
770    does happen.  */
771 
772 inline sreal
compute_inlined_call_time(struct cgraph_edge * edge,sreal time,sreal freq)773 compute_inlined_call_time (struct cgraph_edge *edge,
774 			   sreal time,
775 			   sreal freq)
776 {
777   cgraph_node *caller = (edge->caller->inlined_to
778 			 ? edge->caller->inlined_to
779 			 : edge->caller);
780   sreal caller_time = ipa_fn_summaries->get (caller)->time;
781 
782   if (freq > 0)
783     time *= freq;
784   else
785     time = time >> 11;
786 
787   /* This calculation should match one in ipa-inline-analysis.c
788      (estimate_edge_size_and_time).  */
789   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
790   time += caller_time;
791   if (time <= 0)
792     time = ((sreal) 1) >> 8;
793   gcc_checking_assert (time >= 0);
794   return time;
795 }
796 
797 /* Determine time saved by inlining EDGE of frequency FREQ
798    where callee's runtime w/o inlining is UNINLINED_TYPE
799    and with inlined is INLINED_TYPE.  */
800 
801 inline sreal
inlining_speedup(struct cgraph_edge * edge,sreal freq,sreal uninlined_time,sreal inlined_time)802 inlining_speedup (struct cgraph_edge *edge,
803     		  sreal freq,
804 		  sreal uninlined_time,
805 		  sreal inlined_time)
806 {
807   sreal speedup = uninlined_time - inlined_time;
808   /* Handling of call_time should match one in ipa-inline-fnsummary.c
809      (estimate_edge_size_and_time).  */
810   sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
811 
812   if (freq > 0)
813     {
814       speedup = (speedup + call_time);
815       if (freq != 1)
816        speedup = speedup * freq;
817     }
818   else if (freq == 0)
819     speedup = speedup >> 11;
820   gcc_checking_assert (speedup >= 0);
821   return speedup;
822 }
823 
824 /* Return true if the speedup for inlining E is bigger than
825    param_inline_min_speedup.  */
826 
827 static bool
big_speedup_p(struct cgraph_edge * e)828 big_speedup_p (struct cgraph_edge *e)
829 {
830   sreal unspec_time;
831   sreal spec_time = estimate_edge_time (e, &unspec_time);
832   sreal freq = e->sreal_frequency ();
833   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
834   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
835   cgraph_node *caller = (e->caller->inlined_to
836 			 ? e->caller->inlined_to
837 			 : e->caller);
838   int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
839 
840   if ((time - inlined_time) * 100 > time * limit)
841     return true;
842   return false;
843 }
844 
845 /* Return true if we are interested in inlining small function.
846    When REPORT is true, report reason to dump file.  */
847 
848 static bool
want_inline_small_function_p(struct cgraph_edge * e,bool report)849 want_inline_small_function_p (struct cgraph_edge *e, bool report)
850 {
851   bool want_inline = true;
852   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
853   cgraph_node *to  = (e->caller->inlined_to
854 		      ? e->caller->inlined_to : e->caller);
855 
856   /* Allow this function to be called before can_inline_edge_p,
857      since it's usually cheaper.  */
858   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
859     want_inline = false;
860   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
861     ;
862   else if (!DECL_DECLARED_INLINE_P (callee->decl)
863 	   && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
864     {
865       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
866       want_inline = false;
867     }
868   /* Do fast and conservative check if the function can be good
869      inline candidate.  */
870   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
871 	   && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
872 	   && ipa_fn_summaries->get (callee)->min_size
873 		- ipa_call_summaries->get (e)->call_stmt_size
874 	      > inline_insns_auto (e->caller, true, true))
875     {
876       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
877       want_inline = false;
878     }
879   else if ((DECL_DECLARED_INLINE_P (callee->decl)
880 	    || e->count.ipa ().nonzero_p ())
881 	   && ipa_fn_summaries->get (callee)->min_size
882 		- ipa_call_summaries->get (e)->call_stmt_size
883 	      > inline_insns_single (e->caller, true, true))
884     {
885       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
886 			  ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
887 			  : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
888       want_inline = false;
889     }
890   else
891     {
892       int growth = estimate_edge_growth (e);
893       ipa_hints hints = estimate_edge_hints (e);
894       /* We have two independent groups of hints.  If one matches in each
895 	 of groups the limits are inreased.  If both groups matches, limit
896 	 is increased even more.  */
897       bool apply_hints = (hints & (INLINE_HINT_indirect_call
898 				   | INLINE_HINT_known_hot
899 				   | INLINE_HINT_loop_iterations
900 				   | INLINE_HINT_loop_stride));
901       bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
902 
903       if (growth <= opt_for_fn (to->decl,
904 				param_max_inline_insns_size))
905 	;
906       /* Apply param_max_inline_insns_single limit.  Do not do so when
907 	 hints suggests that inlining given function is very profitable.
908 	 Avoid computation of big_speedup_p when not necessary to change
909 	 outcome of decision.  */
910       else if (DECL_DECLARED_INLINE_P (callee->decl)
911 	       && growth >= inline_insns_single (e->caller, apply_hints,
912 						 apply_hints2)
913 	       && (apply_hints || apply_hints2
914 		   || growth >= inline_insns_single (e->caller, true,
915 						     apply_hints2)
916 		   || !big_speedup_p (e)))
917 	{
918           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
919 	  want_inline = false;
920 	}
921       else if (!DECL_DECLARED_INLINE_P (callee->decl)
922 	       && !opt_for_fn (e->caller->decl, flag_inline_functions)
923 	       && growth >= opt_for_fn (to->decl,
924 					param_max_inline_insns_small))
925 	{
926 	  /* growth_positive_p is expensive, always test it last.  */
927 	  if (growth >= inline_insns_single (e->caller, false, false)
928 	      || growth_positive_p (callee, e, growth))
929 	    {
930               e->inline_failed = CIF_NOT_DECLARED_INLINED;
931 	      want_inline = false;
932  	    }
933 	}
934       /* Apply param_max_inline_insns_auto limit for functions not declared
935 	 inline.  Bypass the limit when speedup seems big.  */
936       else if (!DECL_DECLARED_INLINE_P (callee->decl)
937 	       && growth >= inline_insns_auto (e->caller, apply_hints,
938 					       apply_hints2)
939 	       && (apply_hints || apply_hints2
940 		   || growth >= inline_insns_auto (e->caller, true,
941 						   apply_hints2)
942 		   || !big_speedup_p (e)))
943 	{
944 	  /* growth_positive_p is expensive, always test it last.  */
945 	  if (growth >= inline_insns_single (e->caller, false, false)
946 	      || growth_positive_p (callee, e, growth))
947 	    {
948 	      e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
949 	      want_inline = false;
950  	    }
951 	}
952       /* If call is cold, do not inline when function body would grow. */
953       else if (!e->maybe_hot_p ()
954 	       && (growth >= inline_insns_single (e->caller, false, false)
955 		   || growth_positive_p (callee, e, growth)))
956 	{
957           e->inline_failed = CIF_UNLIKELY_CALL;
958 	  want_inline = false;
959 	}
960     }
961   if (!want_inline && report)
962     report_inline_failed_reason (e);
963   return want_inline;
964 }
965 
966 /* EDGE is self recursive edge.
967    We handle two cases - when function A is inlining into itself
968    or when function A is being inlined into another inliner copy of function
969    A within function B.
970 
971    In first case OUTER_NODE points to the toplevel copy of A, while
972    in the second case OUTER_NODE points to the outermost copy of A in B.
973 
974    In both cases we want to be extra selective since
975    inlining the call will just introduce new recursive calls to appear.  */
976 
977 static bool
want_inline_self_recursive_call_p(struct cgraph_edge * edge,struct cgraph_node * outer_node,bool peeling,int depth)978 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
979 				   struct cgraph_node *outer_node,
980 				   bool peeling,
981 				   int depth)
982 {
983   char const *reason = NULL;
984   bool want_inline = true;
985   sreal caller_freq = 1;
986   int max_depth = opt_for_fn (outer_node->decl,
987 			      param_max_inline_recursive_depth_auto);
988 
989   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
990     max_depth = opt_for_fn (outer_node->decl,
991 			    param_max_inline_recursive_depth);
992 
993   if (!edge->maybe_hot_p ())
994     {
995       reason = "recursive call is cold";
996       want_inline = false;
997     }
998   else if (depth > max_depth)
999     {
1000       reason = "--param max-inline-recursive-depth exceeded.";
1001       want_inline = false;
1002     }
1003   else if (outer_node->inlined_to
1004 	   && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1005     {
1006       reason = "caller frequency is 0";
1007       want_inline = false;
1008     }
1009 
1010   if (!want_inline)
1011     ;
1012   /* Inlining of self recursive function into copy of itself within other
1013      function is transformation similar to loop peeling.
1014 
1015      Peeling is profitable if we can inline enough copies to make probability
1016      of actual call to the self recursive function very small.  Be sure that
1017      the probability of recursion is small.
1018 
1019      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1020      This way the expected number of recursion is at most max_depth.  */
1021   else if (peeling)
1022     {
1023       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1024       int i;
1025       for (i = 1; i < depth; i++)
1026 	max_prob = max_prob * max_prob;
1027       if (edge->sreal_frequency () >= max_prob * caller_freq)
1028 	{
1029 	  reason = "frequency of recursive call is too large";
1030 	  want_inline = false;
1031 	}
1032     }
1033   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1034      recursion depth is large.  We reduce function call overhead and increase
1035      chances that things fit in hardware return predictor.
1036 
1037      Recursive inlining might however increase cost of stack frame setup
1038      actually slowing down functions whose recursion tree is wide rather than
1039      deep.
1040 
1041      Deciding reliably on when to do recursive inlining without profile feedback
1042      is tricky.  For now we disable recursive inlining when probability of self
1043      recursion is low.
1044 
1045      Recursive inlining of self recursive call within loop also results in
1046      large loop depths that generally optimize badly.  We may want to throttle
1047      down inlining in those cases.  In particular this seems to happen in one
1048      of libstdc++ rb tree methods.  */
1049   else
1050     {
1051       if (edge->sreal_frequency () * 100
1052           <= caller_freq
1053 	     * opt_for_fn (outer_node->decl,
1054 			   param_min_inline_recursive_probability))
1055 	{
1056 	  reason = "frequency of recursive call is too small";
1057 	  want_inline = false;
1058 	}
1059     }
1060   if (!want_inline && dump_enabled_p ())
1061     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1062 		     "   not inlining recursively: %s\n", reason);
1063   return want_inline;
1064 }
1065 
1066 /* Return true when NODE has uninlinable caller;
1067    set HAS_HOT_CALL if it has hot call.
1068    Worker for cgraph_for_node_and_aliases.  */
1069 
1070 static bool
check_callers(struct cgraph_node * node,void * has_hot_call)1071 check_callers (struct cgraph_node *node, void *has_hot_call)
1072 {
1073   struct cgraph_edge *e;
1074    for (e = node->callers; e; e = e->next_caller)
1075      {
1076        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1077 	   || !opt_for_fn (e->caller->decl, optimize))
1078 	 return true;
1079        if (!can_inline_edge_p (e, true))
1080          return true;
1081        if (e->recursive_p ())
1082 	 return true;
1083        if (!can_inline_edge_by_limits_p (e, true))
1084          return true;
1085        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1086 	 *(bool *)has_hot_call = true;
1087      }
1088   return false;
1089 }
1090 
1091 /* If NODE has a caller, return true.  */
1092 
1093 static bool
has_caller_p(struct cgraph_node * node,void * data ATTRIBUTE_UNUSED)1094 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1095 {
1096   if (node->callers)
1097     return true;
1098   return false;
1099 }
1100 
1101 /* Decide if inlining NODE would reduce unit size by eliminating
1102    the offline copy of function.
1103    When COLD is true the cold calls are considered, too.  */
1104 
1105 static bool
want_inline_function_to_all_callers_p(struct cgraph_node * node,bool cold)1106 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1107 {
1108   bool has_hot_call = false;
1109 
1110   /* Aliases gets inlined along with the function they alias.  */
1111   if (node->alias)
1112     return false;
1113   /* Already inlined?  */
1114   if (node->inlined_to)
1115     return false;
1116   /* Does it have callers?  */
1117   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1118     return false;
1119   /* Inlining into all callers would increase size?  */
1120   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1121     return false;
1122   /* All inlines must be possible.  */
1123   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1124 					 true))
1125     return false;
1126   if (!cold && !has_hot_call)
1127     return false;
1128   return true;
1129 }
1130 
1131 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1132    in estimate_edge_badness.  */
1133 
1134 static bool
wrapper_heuristics_may_apply(struct cgraph_node * where,int size)1135 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1136 {
1137   return size < (DECL_DECLARED_INLINE_P (where->decl)
1138 		 ? inline_insns_single (where, false, false)
1139 		 : inline_insns_auto (where, false, false));
1140 }
1141 
1142 /* A cost model driving the inlining heuristics in a way so the edges with
1143    smallest badness are inlined first.  After each inlining is performed
1144    the costs of all caller edges of nodes affected are recomputed so the
1145    metrics may accurately depend on values such as number of inlinable callers
1146    of the function or function body size.  */
1147 
1148 static sreal
edge_badness(struct cgraph_edge * edge,bool dump)1149 edge_badness (struct cgraph_edge *edge, bool dump)
1150 {
1151   sreal badness;
1152   int growth;
1153   sreal edge_time, unspec_edge_time;
1154   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1155   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1156   ipa_hints hints;
1157   cgraph_node *caller = (edge->caller->inlined_to
1158 			 ? edge->caller->inlined_to
1159 			 : edge->caller);
1160 
1161   growth = estimate_edge_growth (edge);
1162   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1163   hints = estimate_edge_hints (edge);
1164   gcc_checking_assert (edge_time >= 0);
1165   /* Check that inlined time is better, but tolerate some roundoff issues.
1166      FIXME: When callee profile drops to 0 we account calls more.  This
1167      should be fixed by never doing that.  */
1168   gcc_checking_assert ((edge_time * 100
1169 			- callee_info->time * 101).to_int () <= 0
1170 			|| callee->count.ipa ().initialized_p ());
1171   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1172 
1173   if (dump)
1174     {
1175       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1176 	       edge->caller->dump_name (),
1177 	       edge->callee->dump_name ());
1178       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1179 	       growth,
1180 	       edge_time.to_double (),
1181 	       unspec_edge_time.to_double ());
1182       ipa_dump_hints (dump_file, hints);
1183       if (big_speedup_p (edge))
1184 	fprintf (dump_file, " big_speedup");
1185       fprintf (dump_file, "\n");
1186     }
1187 
1188   /* Always prefer inlining saving code size.  */
1189   if (growth <= 0)
1190     {
1191       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1192       if (dump)
1193 	fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1194 		 growth);
1195     }
1196    /* Inlining into EXTERNAL functions is not going to change anything unless
1197       they are themselves inlined.  */
1198    else if (DECL_EXTERNAL (caller->decl))
1199     {
1200       if (dump)
1201 	fprintf (dump_file, "      max: function is external\n");
1202       return sreal::max ();
1203     }
1204   /* When profile is available. Compute badness as:
1205 
1206                  time_saved * caller_count
1207      goodness =  -------------------------------------------------
1208 	         growth_of_caller * overall_growth * combined_size
1209 
1210      badness = - goodness
1211 
1212      Again use negative value to make calls with profile appear hotter
1213      then calls without.
1214   */
1215   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1216 	   || caller->count.ipa ().nonzero_p ())
1217     {
1218       sreal numerator, denominator;
1219       int overall_growth;
1220       sreal freq = edge->sreal_frequency ();
1221 
1222       numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1223       if (numerator <= 0)
1224 	numerator = ((sreal) 1 >> 8);
1225       if (caller->count.ipa ().nonzero_p ())
1226 	numerator *= caller->count.ipa ().to_gcov_type ();
1227       else if (caller->count.ipa ().initialized_p ())
1228 	numerator = numerator >> 11;
1229       denominator = growth;
1230 
1231       overall_growth = callee_info->growth;
1232 
1233       /* Look for inliner wrappers of the form:
1234 
1235 	 inline_caller ()
1236 	   {
1237 	     do_fast_job...
1238 	     if (need_more_work)
1239 	       noninline_callee ();
1240 	   }
1241 	 Without penalizing this case, we usually inline noninline_callee
1242 	 into the inline_caller because overall_growth is small preventing
1243 	 further inlining of inline_caller.
1244 
1245 	 Penalize only callgraph edges to functions with small overall
1246 	 growth ...
1247 	*/
1248       if (growth > overall_growth
1249 	  /* ... and having only one caller which is not inlined ... */
1250 	  && callee_info->single_caller
1251 	  && !edge->caller->inlined_to
1252 	  /* ... and edges executed only conditionally ... */
1253 	  && freq < 1
1254 	  /* ... consider case where callee is not inline but caller is ... */
1255 	  && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1256 	       && DECL_DECLARED_INLINE_P (caller->decl))
1257 	      /* ... or when early optimizers decided to split and edge
1258 		 frequency still indicates splitting is a win ... */
1259 	      || (callee->split_part && !caller->split_part
1260 		  && freq * 100
1261 			 < opt_for_fn (caller->decl,
1262 				       param_partial_inlining_entry_probability)
1263 		  /* ... and do not overwrite user specified hints.   */
1264 		  && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1265 		      || DECL_DECLARED_INLINE_P (caller->decl)))))
1266 	{
1267 	  ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1268 	  int caller_growth = caller_info->growth;
1269 
1270 	  /* Only apply the penalty when caller looks like inline candidate,
1271 	     and it is not called once.  */
1272 	  if (!caller_info->single_caller && overall_growth < caller_growth
1273 	      && caller_info->inlinable
1274 	      && wrapper_heuristics_may_apply
1275 	     	 (caller, ipa_size_summaries->get (caller)->size))
1276 	    {
1277 	      if (dump)
1278 		fprintf (dump_file,
1279 			 "     Wrapper penalty. Increasing growth %i to %i\n",
1280 			 overall_growth, caller_growth);
1281 	      overall_growth = caller_growth;
1282 	    }
1283 	}
1284       if (overall_growth > 0)
1285         {
1286 	  /* Strongly prefer functions with few callers that can be inlined
1287 	     fully.  The square root here leads to smaller binaries at average.
1288 	     Watch however for extreme cases and return to linear function
1289 	     when growth is large.  */
1290 	  if (overall_growth < 256)
1291 	    overall_growth *= overall_growth;
1292 	  else
1293 	    overall_growth += 256 * 256 - 256;
1294 	  denominator *= overall_growth;
1295         }
1296       denominator *= ipa_size_summaries->get (caller)->size + growth;
1297 
1298       badness = - numerator / denominator;
1299 
1300       if (dump)
1301 	{
1302 	  fprintf (dump_file,
1303 		   "      %f: guessed profile. frequency %f, count %" PRId64
1304 		   " caller count %" PRId64
1305 		   " time saved %f"
1306 		   " overall growth %i (current) %i (original)"
1307 		   " %i (compensated)\n",
1308 		   badness.to_double (),
1309 		   freq.to_double (),
1310 		   edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1311 		   caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1312 		   inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1313 		   estimate_growth (callee),
1314 		   callee_info->growth, overall_growth);
1315 	}
1316     }
1317   /* When function local profile is not available or it does not give
1318      useful information (i.e. frequency is zero), base the cost on
1319      loop nest and overall size growth, so we optimize for overall number
1320      of functions fully inlined in program.  */
1321   else
1322     {
1323       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1324       badness = growth;
1325 
1326       /* Decrease badness if call is nested.  */
1327       if (badness > 0)
1328 	badness = badness >> nest;
1329       else
1330 	badness = badness << nest;
1331       if (dump)
1332 	fprintf (dump_file, "      %f: no profile. nest %i\n",
1333 		 badness.to_double (), nest);
1334     }
1335   gcc_checking_assert (badness != 0);
1336 
1337   if (edge->recursive_p ())
1338     badness = badness.shift (badness > 0 ? 4 : -4);
1339   if ((hints & (INLINE_HINT_indirect_call
1340 		| INLINE_HINT_loop_iterations
1341 		| INLINE_HINT_loop_stride))
1342       || callee_info->growth <= 0)
1343     badness = badness.shift (badness > 0 ? -2 : 2);
1344   if (hints & INLINE_HINT_builtin_constant_p)
1345     badness = badness.shift (badness > 0 ? -4 : 4);
1346   if (hints & (INLINE_HINT_same_scc))
1347     badness = badness.shift (badness > 0 ? 3 : -3);
1348   else if (hints & (INLINE_HINT_in_scc))
1349     badness = badness.shift (badness > 0 ? 2 : -2);
1350   else if (hints & (INLINE_HINT_cross_module))
1351     badness = badness.shift (badness > 0 ? 1 : -1);
1352   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1353     badness = badness.shift (badness > 0 ? -4 : 4);
1354   else if ((hints & INLINE_HINT_declared_inline))
1355     badness = badness.shift (badness > 0 ? -3 : 3);
1356   if (dump)
1357     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1358   return badness;
1359 }
1360 
1361 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1362 static inline void
update_edge_key(edge_heap_t * heap,struct cgraph_edge * edge)1363 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1364 {
1365   sreal badness = edge_badness (edge, false);
1366   if (edge->aux)
1367     {
1368       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1369       gcc_checking_assert (n->get_data () == edge);
1370 
1371       /* fibonacci_heap::replace_key does busy updating of the
1372 	 heap that is unnecessarily expensive.
1373 	 We do lazy increases: after extracting minimum if the key
1374 	 turns out to be out of date, it is re-inserted into heap
1375 	 with correct value.  */
1376       if (badness < n->get_key ())
1377 	{
1378 	  if (dump_file && (dump_flags & TDF_DETAILS))
1379 	    {
1380 	      fprintf (dump_file,
1381 		       "  decreasing badness %s -> %s, %f to %f\n",
1382 		       edge->caller->dump_name (),
1383 		       edge->callee->dump_name (),
1384 		       n->get_key ().to_double (),
1385 		       badness.to_double ());
1386 	    }
1387 	  heap->decrease_key (n, badness);
1388 	}
1389     }
1390   else
1391     {
1392        if (dump_file && (dump_flags & TDF_DETAILS))
1393 	 {
1394 	   fprintf (dump_file,
1395 		    "  enqueuing call %s -> %s, badness %f\n",
1396 		    edge->caller->dump_name (),
1397 		    edge->callee->dump_name (),
1398 		    badness.to_double ());
1399 	 }
1400       edge->aux = heap->insert (badness, edge);
1401     }
1402 }
1403 
1404 
1405 /* NODE was inlined.
1406    All caller edges needs to be reset because
1407    size estimates change. Similarly callees needs reset
1408    because better context may be known.  */
1409 
1410 static void
reset_edge_caches(struct cgraph_node * node)1411 reset_edge_caches (struct cgraph_node *node)
1412 {
1413   struct cgraph_edge *edge;
1414   struct cgraph_edge *e = node->callees;
1415   struct cgraph_node *where = node;
1416   struct ipa_ref *ref;
1417 
1418   if (where->inlined_to)
1419     where = where->inlined_to;
1420 
1421   reset_node_cache (where);
1422 
1423   if (edge_growth_cache != NULL)
1424     for (edge = where->callers; edge; edge = edge->next_caller)
1425       if (edge->inline_failed)
1426 	edge_growth_cache->remove (edge);
1427 
1428   FOR_EACH_ALIAS (where, ref)
1429     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1430 
1431   if (!e)
1432     return;
1433 
1434   while (true)
1435     if (!e->inline_failed && e->callee->callees)
1436       e = e->callee->callees;
1437     else
1438       {
1439 	if (edge_growth_cache != NULL && e->inline_failed)
1440 	  edge_growth_cache->remove (e);
1441 	if (e->next_callee)
1442 	  e = e->next_callee;
1443 	else
1444 	  {
1445 	    do
1446 	      {
1447 		if (e->caller == node)
1448 		  return;
1449 		e = e->caller->callers;
1450 	      }
1451 	    while (!e->next_callee);
1452 	    e = e->next_callee;
1453 	  }
1454       }
1455 }
1456 
1457 /* Recompute HEAP nodes for each of caller of NODE.
1458    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1459    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1460    it is inlinable. Otherwise check all edges.  */
1461 
1462 static void
update_caller_keys(edge_heap_t * heap,struct cgraph_node * node,bitmap updated_nodes,struct cgraph_edge * check_inlinablity_for)1463 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1464 		    bitmap updated_nodes,
1465 		    struct cgraph_edge *check_inlinablity_for)
1466 {
1467   struct cgraph_edge *edge;
1468   struct ipa_ref *ref;
1469 
1470   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1471       || node->inlined_to)
1472     return;
1473   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1474     return;
1475 
1476   FOR_EACH_ALIAS (node, ref)
1477     {
1478       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1479       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1480     }
1481 
1482   for (edge = node->callers; edge; edge = edge->next_caller)
1483     if (edge->inline_failed)
1484       {
1485         if (!check_inlinablity_for
1486 	    || check_inlinablity_for == edge)
1487 	  {
1488 	    if (can_inline_edge_p (edge, false)
1489 		&& want_inline_small_function_p (edge, false)
1490 		&& can_inline_edge_by_limits_p (edge, false))
1491 	      update_edge_key (heap, edge);
1492 	    else if (edge->aux)
1493 	      {
1494 		report_inline_failed_reason (edge);
1495 		heap->delete_node ((edge_heap_node_t *) edge->aux);
1496 		edge->aux = NULL;
1497 	      }
1498 	  }
1499 	else if (edge->aux)
1500 	  update_edge_key (heap, edge);
1501       }
1502 }
1503 
1504 /* Recompute HEAP nodes for each uninlined call in NODE
1505    If UPDATE_SINCE is non-NULL check if edges called within that function
1506    are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1507    where all edges have new context).
1508 
1509    This is used when we know that edge badnesses are going only to increase
1510    (we introduced new call site) and thus all we need is to insert newly
1511    created edges into heap.  */
1512 
1513 static void
update_callee_keys(edge_heap_t * heap,struct cgraph_node * node,struct cgraph_node * update_since,bitmap updated_nodes)1514 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1515 		    struct cgraph_node *update_since,
1516 		    bitmap updated_nodes)
1517 {
1518   struct cgraph_edge *e = node->callees;
1519   bool check_inlinability = update_since == node;
1520 
1521   if (!e)
1522     return;
1523   while (true)
1524     if (!e->inline_failed && e->callee->callees)
1525       {
1526 	if (e->callee == update_since)
1527 	  check_inlinability = true;
1528         e = e->callee->callees;
1529       }
1530     else
1531       {
1532 	enum availability avail;
1533 	struct cgraph_node *callee;
1534 	if (!check_inlinability)
1535 	  {
1536 	    if (e->aux
1537 		&& !bitmap_bit_p (updated_nodes,
1538 		 		  e->callee->ultimate_alias_target
1539 				    (&avail, e->caller)->get_uid ()))
1540 	      update_edge_key (heap, e);
1541 	  }
1542 	/* We do not reset callee growth cache here.  Since we added a new call,
1543 	   growth should have just increased and consequently badness metric
1544            don't need updating.  */
1545 	else if (e->inline_failed
1546 		 && (callee = e->callee->ultimate_alias_target (&avail,
1547 		  						e->caller))
1548 		 && avail >= AVAIL_AVAILABLE
1549 		 && ipa_fn_summaries->get (callee) != NULL
1550 		 && ipa_fn_summaries->get (callee)->inlinable
1551 		 && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1552 	  {
1553 	    if (can_inline_edge_p (e, false)
1554 		&& want_inline_small_function_p (e, false)
1555 		&& can_inline_edge_by_limits_p (e, false))
1556 	      {
1557 		gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1558 		gcc_checking_assert (check_inlinability || e->aux);
1559 	        update_edge_key (heap, e);
1560 	      }
1561 	    else if (e->aux)
1562 	      {
1563 		report_inline_failed_reason (e);
1564 		heap->delete_node ((edge_heap_node_t *) e->aux);
1565 		e->aux = NULL;
1566 	      }
1567 	  }
1568 	/* In case we redirected to unreachable node we only need to remove the
1569 	   fibheap entry.  */
1570 	else if (e->aux)
1571 	  {
1572 	    heap->delete_node ((edge_heap_node_t *) e->aux);
1573 	    e->aux = NULL;
1574 	  }
1575 	if (e->next_callee)
1576 	  e = e->next_callee;
1577 	else
1578 	  {
1579 	    do
1580 	      {
1581 		if (e->caller == node)
1582 		  return;
1583 		if (e->caller == update_since)
1584 		  check_inlinability = false;
1585 		e = e->caller->callers;
1586 	      }
1587 	    while (!e->next_callee);
1588 	    e = e->next_callee;
1589 	  }
1590       }
1591 }
1592 
1593 /* Enqueue all recursive calls from NODE into priority queue depending on
1594    how likely we want to recursively inline the call.  */
1595 
1596 static void
lookup_recursive_calls(struct cgraph_node * node,struct cgraph_node * where,edge_heap_t * heap)1597 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1598 			edge_heap_t *heap)
1599 {
1600   struct cgraph_edge *e;
1601   enum availability avail;
1602 
1603   for (e = where->callees; e; e = e->next_callee)
1604     if (e->callee == node
1605 	|| (e->callee->ultimate_alias_target (&avail, e->caller) == node
1606 	    && avail > AVAIL_INTERPOSABLE))
1607       heap->insert (-e->sreal_frequency (), e);
1608   for (e = where->callees; e; e = e->next_callee)
1609     if (!e->inline_failed)
1610       lookup_recursive_calls (node, e->callee, heap);
1611 }
1612 
1613 /* Decide on recursive inlining: in the case function has recursive calls,
1614    inline until body size reaches given argument.  If any new indirect edges
1615    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1616    is NULL.  */
1617 
1618 static bool
recursive_inlining(struct cgraph_edge * edge,vec<cgraph_edge * > * new_edges)1619 recursive_inlining (struct cgraph_edge *edge,
1620 		    vec<cgraph_edge *> *new_edges)
1621 {
1622   cgraph_node *to  = (edge->caller->inlined_to
1623 		      ? edge->caller->inlined_to : edge->caller);
1624   int limit = opt_for_fn (to->decl,
1625 			  param_max_inline_insns_recursive_auto);
1626   edge_heap_t heap (sreal::min ());
1627   struct cgraph_node *node;
1628   struct cgraph_edge *e;
1629   struct cgraph_node *master_clone = NULL, *next;
1630   int depth = 0;
1631   int n = 0;
1632 
1633   node = edge->caller;
1634   if (node->inlined_to)
1635     node = node->inlined_to;
1636 
1637   if (DECL_DECLARED_INLINE_P (node->decl))
1638     limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1639 
1640   /* Make sure that function is small enough to be considered for inlining.  */
1641   if (estimate_size_after_inlining (node, edge)  >= limit)
1642     return false;
1643   lookup_recursive_calls (node, node, &heap);
1644   if (heap.empty ())
1645     return false;
1646 
1647   if (dump_file)
1648     fprintf (dump_file,
1649 	     "  Performing recursive inlining on %s\n", node->dump_name ());
1650 
1651   /* Do the inlining and update list of recursive call during process.  */
1652   while (!heap.empty ())
1653     {
1654       struct cgraph_edge *curr = heap.extract_min ();
1655       struct cgraph_node *cnode, *dest = curr->callee;
1656 
1657       if (!can_inline_edge_p (curr, true)
1658 	  || !can_inline_edge_by_limits_p (curr, true))
1659 	continue;
1660 
1661       /* MASTER_CLONE is produced in the case we already started modified
1662 	 the function. Be sure to redirect edge to the original body before
1663 	 estimating growths otherwise we will be seeing growths after inlining
1664 	 the already modified body.  */
1665       if (master_clone)
1666 	{
1667 	  curr->redirect_callee (master_clone);
1668 	  if (edge_growth_cache != NULL)
1669 	    edge_growth_cache->remove (curr);
1670 	}
1671 
1672       if (estimate_size_after_inlining (node, curr) > limit)
1673 	{
1674 	  curr->redirect_callee (dest);
1675 	  if (edge_growth_cache != NULL)
1676 	    edge_growth_cache->remove (curr);
1677 	  break;
1678 	}
1679 
1680       depth = 1;
1681       for (cnode = curr->caller;
1682 	   cnode->inlined_to; cnode = cnode->callers->caller)
1683 	if (node->decl
1684 	    == curr->callee->ultimate_alias_target ()->decl)
1685           depth++;
1686 
1687       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1688 	{
1689 	  curr->redirect_callee (dest);
1690 	  if (edge_growth_cache != NULL)
1691 	    edge_growth_cache->remove (curr);
1692 	  continue;
1693 	}
1694 
1695       if (dump_file)
1696 	{
1697 	  fprintf (dump_file,
1698 		   "   Inlining call of depth %i", depth);
1699 	  if (node->count.nonzero_p () && curr->count.initialized_p ())
1700 	    {
1701 	      fprintf (dump_file, " called approx. %.2f times per call",
1702 		       (double)curr->count.to_gcov_type ()
1703 		       / node->count.to_gcov_type ());
1704 	    }
1705 	  fprintf (dump_file, "\n");
1706 	}
1707       if (!master_clone)
1708 	{
1709 	  /* We need original clone to copy around.  */
1710 	  master_clone = node->create_clone (node->decl, node->count,
1711 	    false, vNULL, true, NULL, NULL);
1712 	  for (e = master_clone->callees; e; e = e->next_callee)
1713 	    if (!e->inline_failed)
1714 	      clone_inlined_nodes (e, true, false, NULL);
1715 	  curr->redirect_callee (master_clone);
1716 	  if (edge_growth_cache != NULL)
1717 	    edge_growth_cache->remove (curr);
1718 	}
1719 
1720       inline_call (curr, false, new_edges, &overall_size, true);
1721       reset_node_cache (node);
1722       lookup_recursive_calls (node, curr->callee, &heap);
1723       n++;
1724     }
1725 
1726   if (!heap.empty () && dump_file)
1727     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1728 
1729   if (!master_clone)
1730     return false;
1731 
1732   if (dump_enabled_p ())
1733     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1734 		     "\n   Inlined %i times, "
1735 		     "body grown from size %i to %i, time %f to %f\n", n,
1736 		     ipa_size_summaries->get (master_clone)->size,
1737 		     ipa_size_summaries->get (node)->size,
1738 		     ipa_fn_summaries->get (master_clone)->time.to_double (),
1739 		     ipa_fn_summaries->get (node)->time.to_double ());
1740 
1741   /* Remove master clone we used for inlining.  We rely that clones inlined
1742      into master clone gets queued just before master clone so we don't
1743      need recursion.  */
1744   for (node = symtab->first_function (); node != master_clone;
1745        node = next)
1746     {
1747       next = symtab->next_function (node);
1748       if (node->inlined_to == master_clone)
1749 	node->remove ();
1750     }
1751   master_clone->remove ();
1752   return true;
1753 }
1754 
1755 
1756 /* Given whole compilation unit estimate of INSNS, compute how large we can
1757    allow the unit to grow.  */
1758 
1759 static int64_t
compute_max_insns(cgraph_node * node,int insns)1760 compute_max_insns (cgraph_node *node, int insns)
1761 {
1762   int max_insns = insns;
1763   if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1764     max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1765 
1766   return ((int64_t) max_insns
1767 	  * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1768 }
1769 
1770 
1771 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1772 
1773 static void
add_new_edges_to_heap(edge_heap_t * heap,vec<cgraph_edge * > new_edges)1774 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1775 {
1776   while (new_edges.length () > 0)
1777     {
1778       struct cgraph_edge *edge = new_edges.pop ();
1779 
1780       gcc_assert (!edge->aux);
1781       gcc_assert (edge->callee);
1782       if (edge->inline_failed
1783 	  && can_inline_edge_p (edge, true)
1784 	  && want_inline_small_function_p (edge, true)
1785 	  && can_inline_edge_by_limits_p (edge, true))
1786         edge->aux = heap->insert (edge_badness (edge, false), edge);
1787     }
1788 }
1789 
1790 /* Remove EDGE from the fibheap.  */
1791 
1792 static void
heap_edge_removal_hook(struct cgraph_edge * e,void * data)1793 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1794 {
1795   if (e->aux)
1796     {
1797       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1798       e->aux = NULL;
1799     }
1800 }
1801 
1802 /* Return true if speculation of edge E seems useful.
1803    If ANTICIPATE_INLINING is true, be conservative and hope that E
1804    may get inlined.  */
1805 
1806 bool
speculation_useful_p(struct cgraph_edge * e,bool anticipate_inlining)1807 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1808 {
1809   /* If we have already decided to inline the edge, it seems useful.  */
1810   if (!e->inline_failed)
1811     return true;
1812 
1813   enum availability avail;
1814   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1815 								 e->caller);
1816 
1817   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1818 
1819   if (!e->maybe_hot_p ())
1820     return false;
1821 
1822   /* See if IP optimizations found something potentially useful about the
1823      function.  For now we look only for CONST/PURE flags.  Almost everything
1824      else we propagate is useless.  */
1825   if (avail >= AVAIL_AVAILABLE)
1826     {
1827       int ecf_flags = flags_from_decl_or_type (target->decl);
1828       if (ecf_flags & ECF_CONST)
1829         {
1830 	  if (!(e->speculative_call_indirect_edge ()->indirect_info
1831 		->ecf_flags & ECF_CONST))
1832 	    return true;
1833         }
1834       else if (ecf_flags & ECF_PURE)
1835         {
1836 	  if (!(e->speculative_call_indirect_edge ()->indirect_info
1837 		->ecf_flags & ECF_PURE))
1838 	    return true;
1839         }
1840     }
1841   /* If we did not managed to inline the function nor redirect
1842      to an ipa-cp clone (that are seen by having local flag set),
1843      it is probably pointless to inline it unless hardware is missing
1844      indirect call predictor.  */
1845   if (!anticipate_inlining && !target->local)
1846     return false;
1847   /* For overwritable targets there is not much to do.  */
1848   if (!can_inline_edge_p (e, false)
1849       || !can_inline_edge_by_limits_p (e, false, true))
1850     return false;
1851   /* OK, speculation seems interesting.  */
1852   return true;
1853 }
1854 
1855 /* We know that EDGE is not going to be inlined.
1856    See if we can remove speculation.  */
1857 
1858 static void
resolve_noninline_speculation(edge_heap_t * edge_heap,struct cgraph_edge * edge)1859 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1860 {
1861   if (edge->speculative && !speculation_useful_p (edge, false))
1862     {
1863       struct cgraph_node *node = edge->caller;
1864       struct cgraph_node *where = node->inlined_to
1865 				  ? node->inlined_to : node;
1866       auto_bitmap updated_nodes;
1867 
1868       if (edge->count.ipa ().initialized_p ())
1869         spec_rem += edge->count.ipa ();
1870       cgraph_edge::resolve_speculation (edge);
1871       reset_edge_caches (where);
1872       ipa_update_overall_fn_summary (where);
1873       update_caller_keys (edge_heap, where,
1874 			  updated_nodes, NULL);
1875       update_callee_keys (edge_heap, where, NULL,
1876 			  updated_nodes);
1877     }
1878 }
1879 
1880 /* Return true if NODE should be accounted for overall size estimate.
1881    Skip all nodes optimized for size so we can measure the growth of hot
1882    part of program no matter of the padding.  */
1883 
1884 bool
inline_account_function_p(struct cgraph_node * node)1885 inline_account_function_p (struct cgraph_node *node)
1886 {
1887    return (!DECL_EXTERNAL (node->decl)
1888 	   && !opt_for_fn (node->decl, optimize_size)
1889 	   && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1890 }
1891 
1892 /* Count number of callers of NODE and store it into DATA (that
1893    points to int.  Worker for cgraph_for_node_and_aliases.  */
1894 
1895 static bool
sum_callers(struct cgraph_node * node,void * data)1896 sum_callers (struct cgraph_node *node, void *data)
1897 {
1898   struct cgraph_edge *e;
1899   int *num_calls = (int *)data;
1900 
1901   for (e = node->callers; e; e = e->next_caller)
1902     (*num_calls)++;
1903   return false;
1904 }
1905 
1906 /* We only propagate across edges with non-interposable callee.  */
1907 
1908 inline bool
ignore_edge_p(struct cgraph_edge * e)1909 ignore_edge_p (struct cgraph_edge *e)
1910 {
1911   enum availability avail;
1912   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1913   return (avail <= AVAIL_INTERPOSABLE);
1914 }
1915 
1916 /* We use greedy algorithm for inlining of small functions:
1917    All inline candidates are put into prioritized heap ordered in
1918    increasing badness.
1919 
1920    The inlining of small functions is bounded by unit growth parameters.  */
1921 
1922 static void
inline_small_functions(void)1923 inline_small_functions (void)
1924 {
1925   struct cgraph_node *node;
1926   struct cgraph_edge *edge;
1927   edge_heap_t edge_heap (sreal::min ());
1928   auto_bitmap updated_nodes;
1929   int min_size;
1930   auto_vec<cgraph_edge *> new_indirect_edges;
1931   int initial_size = 0;
1932   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1933   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1934   new_indirect_edges.create (8);
1935 
1936   edge_removal_hook_holder
1937     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1938 
1939   /* Compute overall unit size and other global parameters used by badness
1940      metrics.  */
1941 
1942   max_count = profile_count::uninitialized ();
1943   ipa_reduced_postorder (order, true, ignore_edge_p);
1944   free (order);
1945 
1946   FOR_EACH_DEFINED_FUNCTION (node)
1947     if (!node->inlined_to)
1948       {
1949 	if (!node->alias && node->analyzed
1950 	    && (node->has_gimple_body_p () || node->thunk)
1951 	    && opt_for_fn (node->decl, optimize))
1952 	  {
1953 	    class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1954 	    struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1955 
1956 	    /* Do not account external functions, they will be optimized out
1957 	       if not inlined.  Also only count the non-cold portion of program.  */
1958 	    if (inline_account_function_p (node))
1959 	      initial_size += ipa_size_summaries->get (node)->size;
1960 	    info->growth = estimate_growth (node);
1961 
1962 	    int num_calls = 0;
1963 	    node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1964 					       true);
1965 	    if (num_calls == 1)
1966 	      info->single_caller = true;
1967 	    if (dfs && dfs->next_cycle)
1968 	      {
1969 		struct cgraph_node *n2;
1970 		int id = dfs->scc_no + 1;
1971 		for (n2 = node; n2;
1972 		     n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1973 		  if (opt_for_fn (n2->decl, optimize))
1974 		    {
1975 		      ipa_fn_summary *info2 = ipa_fn_summaries->get
1976 			 (n2->inlined_to ? n2->inlined_to : n2);
1977 		      if (info2->scc_no)
1978 			break;
1979 		      info2->scc_no = id;
1980 		    }
1981 	      }
1982 	  }
1983 
1984 	for (edge = node->callers; edge; edge = edge->next_caller)
1985 	  max_count = max_count.max (edge->count.ipa ());
1986       }
1987   ipa_free_postorder_info ();
1988   initialize_growth_caches ();
1989 
1990   if (dump_file)
1991     fprintf (dump_file,
1992 	     "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1993 	     initial_size);
1994 
1995   overall_size = initial_size;
1996   min_size = overall_size;
1997 
1998   /* Populate the heap with all edges we might inline.  */
1999 
2000   FOR_EACH_DEFINED_FUNCTION (node)
2001     {
2002       bool update = false;
2003       struct cgraph_edge *next = NULL;
2004       bool has_speculative = false;
2005 
2006       if (!opt_for_fn (node->decl, optimize))
2007 	continue;
2008 
2009       if (dump_file)
2010 	fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2011 
2012       for (edge = node->callees; edge; edge = edge->next_callee)
2013 	{
2014 	  if (edge->inline_failed
2015 	      && !edge->aux
2016 	      && can_inline_edge_p (edge, true)
2017 	      && want_inline_small_function_p (edge, true)
2018 	      && can_inline_edge_by_limits_p (edge, true)
2019 	      && edge->inline_failed)
2020 	    {
2021 	      gcc_assert (!edge->aux);
2022 	      update_edge_key (&edge_heap, edge);
2023 	    }
2024 	  if (edge->speculative)
2025 	    has_speculative = true;
2026 	}
2027       if (has_speculative)
2028 	for (edge = node->callees; edge; edge = next)
2029 	  {
2030 	    next = edge->next_callee;
2031 	    if (edge->speculative
2032 		&& !speculation_useful_p (edge, edge->aux != NULL))
2033 	      {
2034 		cgraph_edge::resolve_speculation (edge);
2035 		update = true;
2036 	      }
2037 	  }
2038       if (update)
2039 	{
2040 	  struct cgraph_node *where = node->inlined_to
2041 				      ? node->inlined_to : node;
2042 	  ipa_update_overall_fn_summary (where);
2043 	  reset_edge_caches (where);
2044           update_caller_keys (&edge_heap, where,
2045 			      updated_nodes, NULL);
2046           update_callee_keys (&edge_heap, where, NULL,
2047 			      updated_nodes);
2048           bitmap_clear (updated_nodes);
2049 	}
2050     }
2051 
2052   gcc_assert (in_lto_p
2053 	      || !(max_count > 0)
2054 	      || (profile_info && flag_branch_probabilities));
2055 
2056   while (!edge_heap.empty ())
2057     {
2058       int old_size = overall_size;
2059       struct cgraph_node *where, *callee;
2060       sreal badness = edge_heap.min_key ();
2061       sreal current_badness;
2062       int growth;
2063 
2064       edge = edge_heap.extract_min ();
2065       gcc_assert (edge->aux);
2066       edge->aux = NULL;
2067       if (!edge->inline_failed || !edge->callee->analyzed)
2068 	continue;
2069 
2070       /* Be sure that caches are maintained consistent.
2071 	 This check is affected by scaling roundoff errors when compiling for
2072 	 IPA this we skip it in that case.  */
2073       if (flag_checking && !edge->callee->count.ipa_p ()
2074 	  && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2075 	{
2076 	  sreal cached_badness = edge_badness (edge, false);
2077 
2078 	  int old_size_est = estimate_edge_size (edge);
2079 	  sreal old_time_est = estimate_edge_time (edge);
2080 	  int old_hints_est = estimate_edge_hints (edge);
2081 
2082 	  if (edge_growth_cache != NULL)
2083 	    edge_growth_cache->remove (edge);
2084 	  reset_node_cache (edge->caller->inlined_to
2085 			    ? edge->caller->inlined_to
2086 			    : edge->caller);
2087 	  gcc_assert (old_size_est == estimate_edge_size (edge));
2088 	  gcc_assert (old_time_est == estimate_edge_time (edge));
2089 	  /* FIXME:
2090 
2091 	     gcc_assert (old_hints_est == estimate_edge_hints (edge));
2092 
2093 	     fails with profile feedback because some hints depends on
2094 	     maybe_hot_edge_p predicate and because callee gets inlined to other
2095 	     calls, the edge may become cold.
2096 	     This ought to be fixed by computing relative probabilities
2097 	     for given invocation but that will be better done once whole
2098 	     code is converted to sreals.  Disable for now and revert to "wrong"
2099 	     value so enable/disable checking paths agree.  */
2100 	  edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2101 
2102 	  /* When updating the edge costs, we only decrease badness in the keys.
2103 	     Increases of badness are handled lazily; when we see key with out
2104 	     of date value on it, we re-insert it now.  */
2105 	  current_badness = edge_badness (edge, false);
2106 	  gcc_assert (cached_badness == current_badness);
2107 	  gcc_assert (current_badness >= badness);
2108 	}
2109       else
2110         current_badness = edge_badness (edge, false);
2111       if (current_badness != badness)
2112 	{
2113 	  if (edge_heap.min () && current_badness > edge_heap.min_key ())
2114 	    {
2115 	      edge->aux = edge_heap.insert (current_badness, edge);
2116 	      continue;
2117 	    }
2118 	  else
2119 	    badness = current_badness;
2120 	}
2121 
2122       if (!can_inline_edge_p (edge, true)
2123 	  || !can_inline_edge_by_limits_p (edge, true))
2124 	{
2125 	  resolve_noninline_speculation (&edge_heap, edge);
2126 	  continue;
2127 	}
2128 
2129       callee = edge->callee->ultimate_alias_target ();
2130       growth = estimate_edge_growth (edge);
2131       if (dump_file)
2132 	{
2133 	  fprintf (dump_file,
2134 		   "\nConsidering %s with %i size\n",
2135 		   callee->dump_name (),
2136 		   ipa_size_summaries->get (callee)->size);
2137 	  fprintf (dump_file,
2138 		   " to be inlined into %s in %s:%i\n"
2139 		   " Estimated badness is %f, frequency %.2f.\n",
2140 		   edge->caller->dump_name (),
2141 		   edge->call_stmt
2142 		   && (LOCATION_LOCUS (gimple_location ((const gimple *)
2143 							edge->call_stmt))
2144 		       > BUILTINS_LOCATION)
2145 		   ? gimple_filename ((const gimple *) edge->call_stmt)
2146 		   : "unknown",
2147 		   edge->call_stmt
2148 		   ? gimple_lineno ((const gimple *) edge->call_stmt)
2149 		   : -1,
2150 		   badness.to_double (),
2151 		   edge->sreal_frequency ().to_double ());
2152 	  if (edge->count.ipa ().initialized_p ())
2153 	    {
2154 	      fprintf (dump_file, " Called ");
2155 	      edge->count.ipa ().dump (dump_file);
2156 	      fprintf (dump_file, " times\n");
2157             }
2158 	  if (dump_flags & TDF_DETAILS)
2159 	    edge_badness (edge, true);
2160 	}
2161 
2162       where = edge->caller;
2163 
2164       if (overall_size + growth > compute_max_insns (where, min_size)
2165 	  && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2166 	{
2167 	  edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2168 	  report_inline_failed_reason (edge);
2169 	  resolve_noninline_speculation (&edge_heap, edge);
2170 	  continue;
2171 	}
2172 
2173       if (!want_inline_small_function_p (edge, true))
2174 	{
2175 	  resolve_noninline_speculation (&edge_heap, edge);
2176 	  continue;
2177 	}
2178 
2179       profile_count old_count = callee->count;
2180 
2181       /* Heuristics for inlining small functions work poorly for
2182 	 recursive calls where we do effects similar to loop unrolling.
2183 	 When inlining such edge seems profitable, leave decision on
2184 	 specific inliner.  */
2185       if (edge->recursive_p ())
2186 	{
2187 	  if (where->inlined_to)
2188 	    where = where->inlined_to;
2189 	  if (!recursive_inlining (edge,
2190 				   opt_for_fn (edge->caller->decl,
2191 					       flag_indirect_inlining)
2192 				   ? &new_indirect_edges : NULL))
2193 	    {
2194 	      edge->inline_failed = CIF_RECURSIVE_INLINING;
2195 	      resolve_noninline_speculation (&edge_heap, edge);
2196 	      continue;
2197 	    }
2198 	  reset_edge_caches (where);
2199 	  /* Recursive inliner inlines all recursive calls of the function
2200 	     at once. Consequently we need to update all callee keys.  */
2201 	  if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2202 	    add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2203           update_callee_keys (&edge_heap, where, where, updated_nodes);
2204 	  bitmap_clear (updated_nodes);
2205 	}
2206       else
2207 	{
2208 	  struct cgraph_node *outer_node = NULL;
2209 	  int depth = 0;
2210 
2211 	  /* Consider the case where self recursive function A is inlined
2212 	     into B.  This is desired optimization in some cases, since it
2213 	     leads to effect similar of loop peeling and we might completely
2214 	     optimize out the recursive call.  However we must be extra
2215 	     selective.  */
2216 
2217 	  where = edge->caller;
2218 	  while (where->inlined_to)
2219 	    {
2220 	      if (where->decl == callee->decl)
2221 		outer_node = where, depth++;
2222 	      where = where->callers->caller;
2223 	    }
2224 	  if (outer_node
2225 	      && !want_inline_self_recursive_call_p (edge, outer_node,
2226 						     true, depth))
2227 	    {
2228 	      edge->inline_failed
2229 		= (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2230 		   ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2231 	      resolve_noninline_speculation (&edge_heap, edge);
2232 	      continue;
2233 	    }
2234 	  else if (depth && dump_file)
2235 	    fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2236 
2237 	  gcc_checking_assert (!callee->inlined_to);
2238 
2239 	  int old_size = ipa_size_summaries->get (where)->size;
2240 	  sreal old_time = ipa_fn_summaries->get (where)->time;
2241 
2242 	  inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2243 	  reset_edge_caches (edge->callee);
2244 	  add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2245 
2246 	  /* If caller's size and time increased we do not need to update
2247 	     all edges because badness is not going to decrease.  */
2248 	  if (old_size <= ipa_size_summaries->get (where)->size
2249 	      && old_time <= ipa_fn_summaries->get (where)->time
2250 	      /* Wrapper penalty may be non-monotonous in this respect.
2251 	         Fortunately it only affects small functions.  */
2252 	      && !wrapper_heuristics_may_apply (where, old_size))
2253 	    update_callee_keys (&edge_heap, edge->callee, edge->callee,
2254 			   	updated_nodes);
2255 	  else
2256 	    update_callee_keys (&edge_heap, where,
2257 				edge->callee,
2258 			   	updated_nodes);
2259 	}
2260       where = edge->caller;
2261       if (where->inlined_to)
2262 	where = where->inlined_to;
2263 
2264       /* Our profitability metric can depend on local properties
2265 	 such as number of inlinable calls and size of the function body.
2266 	 After inlining these properties might change for the function we
2267 	 inlined into (since it's body size changed) and for the functions
2268 	 called by function we inlined (since number of it inlinable callers
2269 	 might change).  */
2270       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2271       /* Offline copy count has possibly changed, recompute if profile is
2272 	 available.  */
2273       struct cgraph_node *n
2274 	      = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2275       if (n != edge->callee && n->analyzed && !(n->count == old_count)
2276 	  && n->count.ipa_p ())
2277 	update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2278       bitmap_clear (updated_nodes);
2279 
2280       if (dump_enabled_p ())
2281 	{
2282 	  ipa_fn_summary *s = ipa_fn_summaries->get (where);
2283 
2284 	  /* dump_printf can't handle %+i.  */
2285 	  char buf_net_change[100];
2286 	  snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2287 		    overall_size - old_size);
2288 
2289 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2290 			   " Inlined %C into %C which now has time %f and "
2291 			   "size %i, net change of %s%s.\n",
2292 			   edge->callee, edge->caller,
2293 			   s->time.to_double (),
2294 			   ipa_size_summaries->get (edge->caller)->size,
2295 			   buf_net_change,
2296 			   cross_module_call_p (edge) ? " (cross module)":"");
2297 	}
2298       if (min_size > overall_size)
2299 	{
2300 	  min_size = overall_size;
2301 
2302 	  if (dump_file)
2303 	    fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2304 	}
2305     }
2306 
2307   free_growth_caches ();
2308   if (dump_enabled_p ())
2309     dump_printf (MSG_NOTE,
2310 		 "Unit growth for small function inlining: %i->%i (%i%%)\n",
2311 		 initial_size, overall_size,
2312 		 initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2313   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2314 }
2315 
2316 /* Flatten NODE.  Performed both during early inlining and
2317    at IPA inlining time.  */
2318 
2319 static void
flatten_function(struct cgraph_node * node,bool early,bool update)2320 flatten_function (struct cgraph_node *node, bool early, bool update)
2321 {
2322   struct cgraph_edge *e;
2323 
2324   /* We shouldn't be called recursively when we are being processed.  */
2325   gcc_assert (node->aux == NULL);
2326 
2327   node->aux = (void *) node;
2328 
2329   for (e = node->callees; e; e = e->next_callee)
2330     {
2331       struct cgraph_node *orig_callee;
2332       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2333 
2334       /* We've hit cycle?  It is time to give up.  */
2335       if (callee->aux)
2336 	{
2337 	  if (dump_enabled_p ())
2338 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2339 			     "Not inlining %C into %C to avoid cycle.\n",
2340 			     callee, e->caller);
2341 	  if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2342 	    e->inline_failed = CIF_RECURSIVE_INLINING;
2343 	  continue;
2344 	}
2345 
2346       /* When the edge is already inlined, we just need to recurse into
2347 	 it in order to fully flatten the leaves.  */
2348       if (!e->inline_failed)
2349 	{
2350 	  flatten_function (callee, early, false);
2351 	  continue;
2352 	}
2353 
2354       /* Flatten attribute needs to be processed during late inlining. For
2355 	 extra code quality we however do flattening during early optimization,
2356 	 too.  */
2357       if (!early
2358 	  ? !can_inline_edge_p (e, true)
2359 	    && !can_inline_edge_by_limits_p (e, true)
2360 	  : !can_early_inline_edge_p (e))
2361 	continue;
2362 
2363       if (e->recursive_p ())
2364 	{
2365 	  if (dump_enabled_p ())
2366 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2367 			     "Not inlining: recursive call.\n");
2368 	  continue;
2369 	}
2370 
2371       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2372 	  != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2373 	{
2374 	  if (dump_enabled_p ())
2375 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2376 			     "Not inlining: SSA form does not match.\n");
2377 	  continue;
2378 	}
2379 
2380       /* Inline the edge and flatten the inline clone.  Avoid
2381          recursing through the original node if the node was cloned.  */
2382       if (dump_enabled_p ())
2383 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2384 			 " Inlining %C into %C.\n",
2385 			 callee, e->caller);
2386       orig_callee = callee;
2387       inline_call (e, true, NULL, NULL, false);
2388       if (e->callee != orig_callee)
2389 	orig_callee->aux = (void *) node;
2390       flatten_function (e->callee, early, false);
2391       if (e->callee != orig_callee)
2392 	orig_callee->aux = NULL;
2393     }
2394 
2395   node->aux = NULL;
2396   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2397   if (update && opt_for_fn (where->decl, optimize))
2398     ipa_update_overall_fn_summary (where);
2399 }
2400 
2401 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2402    DATA points to number of calls originally found so we avoid infinite
2403    recursion.  */
2404 
2405 static bool
inline_to_all_callers_1(struct cgraph_node * node,void * data,hash_set<cgraph_node * > * callers)2406 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2407 			 hash_set<cgraph_node *> *callers)
2408 {
2409   int *num_calls = (int *)data;
2410   bool callee_removed = false;
2411 
2412   while (node->callers && !node->inlined_to)
2413     {
2414       struct cgraph_node *caller = node->callers->caller;
2415 
2416       if (!can_inline_edge_p (node->callers, true)
2417 	  || !can_inline_edge_by_limits_p (node->callers, true)
2418 	  || node->callers->recursive_p ())
2419 	{
2420 	  if (dump_file)
2421 	    fprintf (dump_file, "Uninlinable call found; giving up.\n");
2422 	  *num_calls = 0;
2423 	  return false;
2424 	}
2425 
2426       if (dump_file)
2427 	{
2428 	  cgraph_node *ultimate = node->ultimate_alias_target ();
2429 	  fprintf (dump_file,
2430 		   "\nInlining %s size %i.\n",
2431 		   ultimate->dump_name (),
2432 		   ipa_size_summaries->get (ultimate)->size);
2433 	  fprintf (dump_file,
2434 		   " Called once from %s %i insns.\n",
2435 		   node->callers->caller->dump_name (),
2436 		   ipa_size_summaries->get (node->callers->caller)->size);
2437 	}
2438 
2439       /* Remember which callers we inlined to, delaying updating the
2440 	 overall summary.  */
2441       callers->add (node->callers->caller);
2442       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2443       if (dump_file)
2444 	fprintf (dump_file,
2445 		 " Inlined into %s which now has %i size\n",
2446 		 caller->dump_name (),
2447 		 ipa_size_summaries->get (caller)->size);
2448       if (!(*num_calls)--)
2449 	{
2450 	  if (dump_file)
2451 	    fprintf (dump_file, "New calls found; giving up.\n");
2452 	  return callee_removed;
2453 	}
2454       if (callee_removed)
2455 	return true;
2456     }
2457   return false;
2458 }
2459 
2460 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2461    update.  */
2462 
2463 static bool
inline_to_all_callers(struct cgraph_node * node,void * data)2464 inline_to_all_callers (struct cgraph_node *node, void *data)
2465 {
2466   hash_set<cgraph_node *> callers;
2467   bool res = inline_to_all_callers_1 (node, data, &callers);
2468   /* Perform the delayed update of the overall summary of all callers
2469      processed.  This avoids quadratic behavior in the cases where
2470      we have a lot of calls to the same function.  */
2471   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2472        i != callers.end (); ++i)
2473     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2474   return res;
2475 }
2476 
2477 /* Output overall time estimate.  */
2478 static void
dump_overall_stats(void)2479 dump_overall_stats (void)
2480 {
2481   sreal sum_weighted = 0, sum = 0;
2482   struct cgraph_node *node;
2483 
2484   FOR_EACH_DEFINED_FUNCTION (node)
2485     if (!node->inlined_to
2486 	&& !node->alias)
2487       {
2488 	ipa_fn_summary *s = ipa_fn_summaries->get (node);
2489 	if (s != NULL)
2490 	  {
2491 	  sum += s->time;
2492 	  if (node->count.ipa ().initialized_p ())
2493 	    sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2494 	  }
2495       }
2496   fprintf (dump_file, "Overall time estimate: "
2497 	   "%f weighted by profile: "
2498 	   "%f\n", sum.to_double (), sum_weighted.to_double ());
2499 }
2500 
2501 /* Output some useful stats about inlining.  */
2502 
2503 static void
dump_inline_stats(void)2504 dump_inline_stats (void)
2505 {
2506   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2507   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2508   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2509   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2510   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2511   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2512   int64_t reason[CIF_N_REASONS][2];
2513   sreal reason_freq[CIF_N_REASONS];
2514   int i;
2515   struct cgraph_node *node;
2516 
2517   memset (reason, 0, sizeof (reason));
2518   for (i=0; i < CIF_N_REASONS; i++)
2519     reason_freq[i] = 0;
2520   FOR_EACH_DEFINED_FUNCTION (node)
2521   {
2522     struct cgraph_edge *e;
2523     for (e = node->callees; e; e = e->next_callee)
2524       {
2525 	if (e->inline_failed)
2526 	  {
2527 	    if (e->count.ipa ().initialized_p ())
2528 	      reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2529 	    reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2530 	    reason[(int) e->inline_failed][1] ++;
2531 	    if (DECL_VIRTUAL_P (e->callee->decl)
2532 		&& e->count.ipa ().initialized_p ())
2533 	      {
2534 		if (e->indirect_inlining_edge)
2535 		  noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2536 		else
2537 		  noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2538 	      }
2539 	    else if (e->count.ipa ().initialized_p ())
2540 	      {
2541 		if (e->indirect_inlining_edge)
2542 		  noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2543 		else
2544 		  noninlined_cnt += e->count.ipa ().to_gcov_type ();
2545 	      }
2546 	  }
2547 	else if (e->count.ipa ().initialized_p ())
2548 	  {
2549 	    if (e->speculative)
2550 	      {
2551 		if (DECL_VIRTUAL_P (e->callee->decl))
2552 		  inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2553 		else
2554 		  inlined_speculative += e->count.ipa ().to_gcov_type ();
2555 	      }
2556 	    else if (DECL_VIRTUAL_P (e->callee->decl))
2557 	      {
2558 		if (e->indirect_inlining_edge)
2559 		  inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2560 		else
2561 		  inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2562 	      }
2563 	    else
2564 	      {
2565 		if (e->indirect_inlining_edge)
2566 		  inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2567 		else
2568 		  inlined_cnt += e->count.ipa ().to_gcov_type ();
2569 	      }
2570 	  }
2571       }
2572     for (e = node->indirect_calls; e; e = e->next_callee)
2573       if (e->indirect_info->polymorphic
2574 	  & e->count.ipa ().initialized_p ())
2575 	indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2576       else if (e->count.ipa ().initialized_p ())
2577 	indirect_cnt += e->count.ipa ().to_gcov_type ();
2578   }
2579   if (max_count.initialized_p ())
2580     {
2581       fprintf (dump_file,
2582 	       "Inlined %" PRId64 " + speculative "
2583 	       "%" PRId64 " + speculative polymorphic "
2584 	       "%" PRId64 " + previously indirect "
2585 	       "%" PRId64 " + virtual "
2586 	       "%" PRId64 " + virtual and previously indirect "
2587 	       "%" PRId64 "\n" "Not inlined "
2588 	       "%" PRId64 " + previously indirect "
2589 	       "%" PRId64 " + virtual "
2590 	       "%" PRId64 " + virtual and previously indirect "
2591 	       "%" PRId64 " + still indirect "
2592 	       "%" PRId64 " + still indirect polymorphic "
2593 	       "%" PRId64 "\n", inlined_cnt,
2594 	       inlined_speculative, inlined_speculative_ply,
2595 	       inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2596 	       noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2597 	       noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2598       fprintf (dump_file, "Removed speculations ");
2599       spec_rem.dump (dump_file);
2600       fprintf (dump_file, "\n");
2601     }
2602   dump_overall_stats ();
2603   fprintf (dump_file, "\nWhy inlining failed?\n");
2604   for (i = 0; i < CIF_N_REASONS; i++)
2605     if (reason[i][1])
2606       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2607 	       cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2608 	       (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2609 }
2610 
2611 /* Called when node is removed.  */
2612 
2613 static void
flatten_remove_node_hook(struct cgraph_node * node,void * data)2614 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2615 {
2616   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2617     return;
2618 
2619   hash_set<struct cgraph_node *> *removed
2620     = (hash_set<struct cgraph_node *> *) data;
2621   removed->add (node);
2622 }
2623 
2624 /* Decide on the inlining.  We do so in the topological order to avoid
2625    expenses on updating data structures.  */
2626 
2627 static unsigned int
ipa_inline(void)2628 ipa_inline (void)
2629 {
2630   struct cgraph_node *node;
2631   int nnodes;
2632   struct cgraph_node **order;
2633   int i, j;
2634   int cold;
2635   bool remove_functions = false;
2636 
2637   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2638 
2639   if (dump_file)
2640     ipa_dump_fn_summaries (dump_file);
2641 
2642   nnodes = ipa_reverse_postorder (order);
2643   spec_rem = profile_count::zero ();
2644 
2645   FOR_EACH_FUNCTION (node)
2646     {
2647       node->aux = 0;
2648 
2649       /* Recompute the default reasons for inlining because they may have
2650 	 changed during merging.  */
2651       if (in_lto_p)
2652 	{
2653 	  for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2654 	    {
2655 	      gcc_assert (e->inline_failed);
2656 	      initialize_inline_failed (e);
2657 	    }
2658 	  for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2659 	    initialize_inline_failed (e);
2660 	}
2661     }
2662 
2663   if (dump_file)
2664     fprintf (dump_file, "\nFlattening functions:\n");
2665 
2666   /* First shrink order array, so that it only contains nodes with
2667      flatten attribute.  */
2668   for (i = nnodes - 1, j = i; i >= 0; i--)
2669     {
2670       node = order[i];
2671       if (node->definition
2672 	  /* Do not try to flatten aliases.  These may happen for example when
2673 	     creating local aliases.  */
2674 	  && !node->alias
2675 	  && lookup_attribute ("flatten",
2676 			       DECL_ATTRIBUTES (node->decl)) != NULL)
2677 	order[j--] = order[i];
2678     }
2679 
2680   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2681      nodes with flatten attribute.  If there is more than one such
2682      node, we need to register a node removal hook, as flatten_function
2683      could remove other nodes with flatten attribute.  See PR82801.  */
2684   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2685   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2686   if (j < nnodes - 2)
2687     {
2688       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2689       node_removal_hook_holder
2690 	= symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2691 					   flatten_removed_nodes);
2692     }
2693 
2694   /* In the first pass handle functions to be flattened.  Do this with
2695      a priority so none of our later choices will make this impossible.  */
2696   for (i = nnodes - 1; i > j; i--)
2697     {
2698       node = order[i];
2699       if (flatten_removed_nodes
2700 	  && flatten_removed_nodes->contains (node))
2701 	continue;
2702 
2703       /* Handle nodes to be flattened.
2704 	 Ideally when processing callees we stop inlining at the
2705 	 entry of cycles, possibly cloning that entry point and
2706 	 try to flatten itself turning it into a self-recursive
2707 	 function.  */
2708       if (dump_file)
2709 	fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2710       flatten_function (node, false, true);
2711     }
2712 
2713   if (j < nnodes - 2)
2714     {
2715       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2716       delete flatten_removed_nodes;
2717     }
2718   free (order);
2719 
2720   if (dump_file)
2721     dump_overall_stats ();
2722 
2723   inline_small_functions ();
2724 
2725   gcc_assert (symtab->state == IPA_SSA);
2726   symtab->state = IPA_SSA_AFTER_INLINING;
2727   /* Do first after-inlining removal.  We want to remove all "stale" extern
2728      inline functions and virtual functions so we really know what is called
2729      once.  */
2730   symtab->remove_unreachable_nodes (dump_file);
2731 
2732   /* Inline functions with a property that after inlining into all callers the
2733      code size will shrink because the out-of-line copy is eliminated.
2734      We do this regardless on the callee size as long as function growth limits
2735      are met.  */
2736   if (dump_file)
2737     fprintf (dump_file,
2738 	     "\nDeciding on functions to be inlined into all callers and "
2739 	     "removing useless speculations:\n");
2740 
2741   /* Inlining one function called once has good chance of preventing
2742      inlining other function into the same callee.  Ideally we should
2743      work in priority order, but probably inlining hot functions first
2744      is good cut without the extra pain of maintaining the queue.
2745 
2746      ??? this is not really fitting the bill perfectly: inlining function
2747      into callee often leads to better optimization of callee due to
2748      increased context for optimization.
2749      For example if main() function calls a function that outputs help
2750      and then function that does the main optimization, we should inline
2751      the second with priority even if both calls are cold by themselves.
2752 
2753      We probably want to implement new predicate replacing our use of
2754      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2755      to be hot.  */
2756   for (cold = 0; cold <= 1; cold ++)
2757     {
2758       FOR_EACH_DEFINED_FUNCTION (node)
2759 	{
2760 	  struct cgraph_edge *edge, *next;
2761 	  bool update=false;
2762 
2763 	  if (!opt_for_fn (node->decl, optimize)
2764 	      || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2765 	    continue;
2766 
2767 	  for (edge = node->callees; edge; edge = next)
2768 	    {
2769 	      next = edge->next_callee;
2770 	      if (edge->speculative && !speculation_useful_p (edge, false))
2771 		{
2772 		  if (edge->count.ipa ().initialized_p ())
2773 		    spec_rem += edge->count.ipa ();
2774 		  cgraph_edge::resolve_speculation (edge);
2775 		  update = true;
2776 		  remove_functions = true;
2777 		}
2778 	    }
2779 	  if (update)
2780 	    {
2781 	      struct cgraph_node *where = node->inlined_to
2782 					  ? node->inlined_to : node;
2783 	      reset_edge_caches (where);
2784 	      ipa_update_overall_fn_summary (where);
2785 	    }
2786 	  if (want_inline_function_to_all_callers_p (node, cold))
2787 	    {
2788 	      int num_calls = 0;
2789 	      node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2790 						 true);
2791 	      while (node->call_for_symbol_and_aliases
2792 		       (inline_to_all_callers, &num_calls, true))
2793 		;
2794 	      remove_functions = true;
2795 	    }
2796 	}
2797     }
2798 
2799   if (dump_enabled_p ())
2800     dump_printf (MSG_NOTE,
2801 		 "\nInlined %i calls, eliminated %i functions\n\n",
2802 		 ncalls_inlined, nfunctions_inlined);
2803   if (dump_file)
2804     dump_inline_stats ();
2805 
2806   if (dump_file)
2807     ipa_dump_fn_summaries (dump_file);
2808   return remove_functions ? TODO_remove_functions : 0;
2809 }
2810 
2811 /* Inline always-inline function calls in NODE.  */
2812 
2813 static bool
inline_always_inline_functions(struct cgraph_node * node)2814 inline_always_inline_functions (struct cgraph_node *node)
2815 {
2816   struct cgraph_edge *e;
2817   bool inlined = false;
2818 
2819   for (e = node->callees; e; e = e->next_callee)
2820     {
2821       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2822       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2823 	continue;
2824 
2825       if (e->recursive_p ())
2826 	{
2827 	  if (dump_enabled_p ())
2828 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2829 			     "  Not inlining recursive call to %C.\n",
2830 			     e->callee);
2831 	  e->inline_failed = CIF_RECURSIVE_INLINING;
2832 	  continue;
2833 	}
2834 
2835       if (!can_early_inline_edge_p (e))
2836 	{
2837 	  /* Set inlined to true if the callee is marked "always_inline" but
2838 	     is not inlinable.  This will allow flagging an error later in
2839 	     expand_call_inline in tree-inline.c.  */
2840 	  if (lookup_attribute ("always_inline",
2841 				 DECL_ATTRIBUTES (callee->decl)) != NULL)
2842 	    inlined = true;
2843 	  continue;
2844 	}
2845 
2846       if (dump_enabled_p ())
2847 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2848 			 "  Inlining %C into %C (always_inline).\n",
2849 			 e->callee, e->caller);
2850       inline_call (e, true, NULL, NULL, false);
2851       inlined = true;
2852     }
2853   if (inlined)
2854     ipa_update_overall_fn_summary (node);
2855 
2856   return inlined;
2857 }
2858 
2859 /* Decide on the inlining.  We do so in the topological order to avoid
2860    expenses on updating data structures.  */
2861 
2862 static bool
early_inline_small_functions(struct cgraph_node * node)2863 early_inline_small_functions (struct cgraph_node *node)
2864 {
2865   struct cgraph_edge *e;
2866   bool inlined = false;
2867 
2868   for (e = node->callees; e; e = e->next_callee)
2869     {
2870       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2871 
2872       /* We can encounter not-yet-analyzed function during
2873 	 early inlining on callgraphs with strongly
2874 	 connected components.  */
2875       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2876       if (s == NULL || !s->inlinable || !e->inline_failed)
2877 	continue;
2878 
2879       /* Do not consider functions not declared inline.  */
2880       if (!DECL_DECLARED_INLINE_P (callee->decl)
2881 	  && !opt_for_fn (node->decl, flag_inline_small_functions)
2882 	  && !opt_for_fn (node->decl, flag_inline_functions))
2883 	continue;
2884 
2885       if (dump_enabled_p ())
2886 	dump_printf_loc (MSG_NOTE, e->call_stmt,
2887 			 "Considering inline candidate %C.\n",
2888 			 callee);
2889 
2890       if (!can_early_inline_edge_p (e))
2891 	continue;
2892 
2893       if (e->recursive_p ())
2894 	{
2895 	  if (dump_enabled_p ())
2896 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2897 			     "  Not inlining: recursive call.\n");
2898 	  continue;
2899 	}
2900 
2901       if (!want_early_inline_function_p (e))
2902 	continue;
2903 
2904       if (dump_enabled_p ())
2905 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2906 			 " Inlining %C into %C.\n",
2907 			 callee, e->caller);
2908       inline_call (e, true, NULL, NULL, false);
2909       inlined = true;
2910     }
2911 
2912   if (inlined)
2913     ipa_update_overall_fn_summary (node);
2914 
2915   return inlined;
2916 }
2917 
2918 unsigned int
early_inliner(function * fun)2919 early_inliner (function *fun)
2920 {
2921   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2922   struct cgraph_edge *edge;
2923   unsigned int todo = 0;
2924   int iterations = 0;
2925   bool inlined = false;
2926 
2927   if (seen_error ())
2928     return 0;
2929 
2930   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2931      happens when some pass decides to construct new function and
2932      cgraph_add_new_function calls lowering passes and early optimization on
2933      it.  This may confuse ourself when early inliner decide to inline call to
2934      function clone, because function clones don't have parameter list in
2935      ipa-prop matching their signature.  */
2936   if (ipa_node_params_sum)
2937     return 0;
2938 
2939   if (flag_checking)
2940     node->verify ();
2941   node->remove_all_references ();
2942 
2943   /* Even when not optimizing or not inlining inline always-inline
2944      functions.  */
2945   inlined = inline_always_inline_functions (node);
2946 
2947   if (!optimize
2948       || flag_no_inline
2949       || !flag_early_inlining
2950       /* Never inline regular functions into always-inline functions
2951 	 during incremental inlining.  This sucks as functions calling
2952 	 always inline functions will get less optimized, but at the
2953 	 same time inlining of functions calling always inline
2954 	 function into an always inline function might introduce
2955 	 cycles of edges to be always inlined in the callgraph.
2956 
2957 	 We might want to be smarter and just avoid this type of inlining.  */
2958       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2959 	  && lookup_attribute ("always_inline",
2960 			       DECL_ATTRIBUTES (node->decl))))
2961     ;
2962   else if (lookup_attribute ("flatten",
2963 			     DECL_ATTRIBUTES (node->decl)) != NULL)
2964     {
2965       /* When the function is marked to be flattened, recursively inline
2966 	 all calls in it.  */
2967       if (dump_enabled_p ())
2968 	dump_printf (MSG_OPTIMIZED_LOCATIONS,
2969 		     "Flattening %C\n", node);
2970       flatten_function (node, true, true);
2971       inlined = true;
2972     }
2973   else
2974     {
2975       /* If some always_inline functions was inlined, apply the changes.
2976 	 This way we will not account always inline into growth limits and
2977 	 moreover we will inline calls from always inlines that we skipped
2978 	 previously because of conditional above.  */
2979       if (inlined)
2980 	{
2981 	  timevar_push (TV_INTEGRATION);
2982 	  todo |= optimize_inline_calls (current_function_decl);
2983 	  /* optimize_inline_calls call above might have introduced new
2984 	     statements that don't have inline parameters computed.  */
2985 	  for (edge = node->callees; edge; edge = edge->next_callee)
2986 	    {
2987 	      /* We can enounter not-yet-analyzed function during
2988 		 early inlining on callgraphs with strongly
2989 		 connected components.  */
2990 	      ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2991 	      es->call_stmt_size
2992 		= estimate_num_insns (edge->call_stmt, &eni_size_weights);
2993 	      es->call_stmt_time
2994 		= estimate_num_insns (edge->call_stmt, &eni_time_weights);
2995 	    }
2996 	  ipa_update_overall_fn_summary (node);
2997 	  inlined = false;
2998 	  timevar_pop (TV_INTEGRATION);
2999 	}
3000       /* We iterate incremental inlining to get trivial cases of indirect
3001 	 inlining.  */
3002       while (iterations < opt_for_fn (node->decl,
3003 				      param_early_inliner_max_iterations)
3004 	     && early_inline_small_functions (node))
3005 	{
3006 	  timevar_push (TV_INTEGRATION);
3007 	  todo |= optimize_inline_calls (current_function_decl);
3008 
3009 	  /* Technically we ought to recompute inline parameters so the new
3010  	     iteration of early inliner works as expected.  We however have
3011 	     values approximately right and thus we only need to update edge
3012 	     info that might be cleared out for newly discovered edges.  */
3013 	  for (edge = node->callees; edge; edge = edge->next_callee)
3014 	    {
3015 	      /* We have no summary for new bound store calls yet.  */
3016 	      ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3017 	      es->call_stmt_size
3018 		= estimate_num_insns (edge->call_stmt, &eni_size_weights);
3019 	      es->call_stmt_time
3020 		= estimate_num_insns (edge->call_stmt, &eni_time_weights);
3021 	    }
3022 	  if (iterations < opt_for_fn (node->decl,
3023 				       param_early_inliner_max_iterations) - 1)
3024 	    ipa_update_overall_fn_summary (node);
3025 	  timevar_pop (TV_INTEGRATION);
3026 	  iterations++;
3027 	  inlined = false;
3028 	}
3029       if (dump_file)
3030 	fprintf (dump_file, "Iterations: %i\n", iterations);
3031     }
3032 
3033   if (inlined)
3034     {
3035       timevar_push (TV_INTEGRATION);
3036       todo |= optimize_inline_calls (current_function_decl);
3037       timevar_pop (TV_INTEGRATION);
3038     }
3039 
3040   fun->always_inline_functions_inlined = true;
3041 
3042   return todo;
3043 }
3044 
3045 /* Do inlining of small functions.  Doing so early helps profiling and other
3046    passes to be somewhat more effective and avoids some code duplication in
3047    later real inlining pass for testcases with very many function calls.  */
3048 
3049 namespace {
3050 
3051 const pass_data pass_data_early_inline =
3052 {
3053   GIMPLE_PASS, /* type */
3054   "einline", /* name */
3055   OPTGROUP_INLINE, /* optinfo_flags */
3056   TV_EARLY_INLINING, /* tv_id */
3057   PROP_ssa, /* properties_required */
3058   0, /* properties_provided */
3059   0, /* properties_destroyed */
3060   0, /* todo_flags_start */
3061   0, /* todo_flags_finish */
3062 };
3063 
3064 class pass_early_inline : public gimple_opt_pass
3065 {
3066 public:
pass_early_inline(gcc::context * ctxt)3067   pass_early_inline (gcc::context *ctxt)
3068     : gimple_opt_pass (pass_data_early_inline, ctxt)
3069   {}
3070 
3071   /* opt_pass methods: */
3072   virtual unsigned int execute (function *);
3073 
3074 }; // class pass_early_inline
3075 
3076 unsigned int
execute(function * fun)3077 pass_early_inline::execute (function *fun)
3078 {
3079   return early_inliner (fun);
3080 }
3081 
3082 } // anon namespace
3083 
3084 gimple_opt_pass *
make_pass_early_inline(gcc::context * ctxt)3085 make_pass_early_inline (gcc::context *ctxt)
3086 {
3087   return new pass_early_inline (ctxt);
3088 }
3089 
3090 namespace {
3091 
3092 const pass_data pass_data_ipa_inline =
3093 {
3094   IPA_PASS, /* type */
3095   "inline", /* name */
3096   OPTGROUP_INLINE, /* optinfo_flags */
3097   TV_IPA_INLINING, /* tv_id */
3098   0, /* properties_required */
3099   0, /* properties_provided */
3100   0, /* properties_destroyed */
3101   0, /* todo_flags_start */
3102   ( TODO_dump_symtab ), /* todo_flags_finish */
3103 };
3104 
3105 class pass_ipa_inline : public ipa_opt_pass_d
3106 {
3107 public:
pass_ipa_inline(gcc::context * ctxt)3108   pass_ipa_inline (gcc::context *ctxt)
3109     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3110 		      NULL, /* generate_summary */
3111 		      NULL, /* write_summary */
3112 		      NULL, /* read_summary */
3113 		      NULL, /* write_optimization_summary */
3114 		      NULL, /* read_optimization_summary */
3115 		      NULL, /* stmt_fixup */
3116 		      0, /* function_transform_todo_flags_start */
3117 		      inline_transform, /* function_transform */
3118 		      NULL) /* variable_transform */
3119   {}
3120 
3121   /* opt_pass methods: */
execute(function *)3122   virtual unsigned int execute (function *) { return ipa_inline (); }
3123 
3124 }; // class pass_ipa_inline
3125 
3126 } // anon namespace
3127 
3128 ipa_opt_pass_d *
make_pass_ipa_inline(gcc::context * ctxt)3129 make_pass_ipa_inline (gcc::context *ctxt)
3130 {
3131   return new pass_ipa_inline (ctxt);
3132 }
3133