1 /* Function summary pass.
2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
3    Contributed by Jan Hubicka
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Analysis of function bodies used by inter-procedural passes
22 
23    We estimate for each function
24      - function body size and size after specializing into given context
25      - average function execution time in a given context
26      - function frame size
27    For each call
28      - call statement size, time and how often the parameters change
29 
30    ipa_fn_summary data structures store above information locally (i.e.
31    parameters of the function itself) and globally (i.e. parameters of
32    the function created by applying all the inline decisions already
33    present in the callgraph).
34 
35    We provide access to the ipa_fn_summary data structure and
36    basic logic updating the parameters when inlining is performed.
37 
38    The summaries are context sensitive.  Context means
39      1) partial assignment of known constant values of operands
40      2) whether function is inlined into the call or not.
41    It is easy to add more variants.  To represent function size and time
42    that depends on context (i.e. it is known to be optimized away when
43    context is known either by inlining or from IP-CP and cloning),
44    we use predicates.
45 
46    estimate_edge_size_and_time can be used to query
47    function size/time in the given context.  ipa_merge_fn_summary_after_inlining merges
48    properties of caller and callee after inlining.
49 
50    Finally pass_inline_parameters is exported.  This is used to drive
51    computation of function parameters used by the early inliner. IPA
52    inlined performs analysis via its analyze_function method. */
53 
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "cfganal.h"
71 #include "gimple-iterator.h"
72 #include "tree-cfg.h"
73 #include "tree-ssa-loop-niter.h"
74 #include "tree-ssa-loop.h"
75 #include "symbol-summary.h"
76 #include "ipa-prop.h"
77 #include "ipa-fnsummary.h"
78 #include "cfgloop.h"
79 #include "tree-scalar-evolution.h"
80 #include "ipa-utils.h"
81 #include "cfgexpand.h"
82 #include "gimplify.h"
83 #include "stringpool.h"
84 #include "attribs.h"
85 #include "tree-into-ssa.h"
86 
87 /* Summaries.  */
88 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
89 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
90 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
91 
92 /* Edge predicates goes here.  */
93 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
94 
95 
96 /* Dump IPA hints.  */
97 void
ipa_dump_hints(FILE * f,ipa_hints hints)98 ipa_dump_hints (FILE *f, ipa_hints hints)
99 {
100   if (!hints)
101     return;
102   fprintf (f, "IPA hints:");
103   if (hints & INLINE_HINT_indirect_call)
104     {
105       hints &= ~INLINE_HINT_indirect_call;
106       fprintf (f, " indirect_call");
107     }
108   if (hints & INLINE_HINT_loop_iterations)
109     {
110       hints &= ~INLINE_HINT_loop_iterations;
111       fprintf (f, " loop_iterations");
112     }
113   if (hints & INLINE_HINT_loop_stride)
114     {
115       hints &= ~INLINE_HINT_loop_stride;
116       fprintf (f, " loop_stride");
117     }
118   if (hints & INLINE_HINT_same_scc)
119     {
120       hints &= ~INLINE_HINT_same_scc;
121       fprintf (f, " same_scc");
122     }
123   if (hints & INLINE_HINT_in_scc)
124     {
125       hints &= ~INLINE_HINT_in_scc;
126       fprintf (f, " in_scc");
127     }
128   if (hints & INLINE_HINT_cross_module)
129     {
130       hints &= ~INLINE_HINT_cross_module;
131       fprintf (f, " cross_module");
132     }
133   if (hints & INLINE_HINT_declared_inline)
134     {
135       hints &= ~INLINE_HINT_declared_inline;
136       fprintf (f, " declared_inline");
137     }
138   if (hints & INLINE_HINT_known_hot)
139     {
140       hints &= ~INLINE_HINT_known_hot;
141       fprintf (f, " known_hot");
142     }
143   gcc_assert (!hints);
144 }
145 
146 
147 /* Record SIZE and TIME to SUMMARY.
148    The accounted code will be executed when EXEC_PRED is true.
149    When NONCONST_PRED is false the code will evaluate to constant and
150    will get optimized out in specialized clones of the function.
151    If CALL is true account to call_size_time_table rather than
152    size_time_table.   */
153 
154 void
account_size_time(int size,sreal time,const predicate & exec_pred,const predicate & nonconst_pred_in,bool call)155 ipa_fn_summary::account_size_time (int size, sreal time,
156 				   const predicate &exec_pred,
157 				   const predicate &nonconst_pred_in,
158 				   bool call)
159 {
160   size_time_entry *e;
161   bool found = false;
162   int i;
163   predicate nonconst_pred;
164   vec<size_time_entry, va_gc> *table = call
165 	 			       ? call_size_time_table : size_time_table;
166 
167   if (exec_pred == false)
168     return;
169 
170   nonconst_pred = nonconst_pred_in & exec_pred;
171 
172   if (nonconst_pred == false)
173     return;
174 
175   /* We need to create initial empty unconditional clause, but otherwise
176      we don't need to account empty times and sizes.  */
177   if (!size && time == 0 && table)
178     return;
179 
180   /* Only for calls we are unaccounting what we previously recorded.  */
181   gcc_checking_assert (time >= 0 || call);
182 
183   for (i = 0; vec_safe_iterate (table, i, &e); i++)
184     if (e->exec_predicate == exec_pred
185 	&& e->nonconst_predicate == nonconst_pred)
186       {
187 	found = true;
188 	break;
189       }
190   if (i == max_size_time_table_size)
191     {
192       i = 0;
193       found = true;
194       e = &(*table)[0];
195       if (dump_file && (dump_flags & TDF_DETAILS))
196 	fprintf (dump_file,
197 		 "\t\tReached limit on number of entries, "
198 		 "ignoring the predicate.");
199     }
200   if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
201     {
202       fprintf (dump_file,
203 	       "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
204 	       ((double) size) / ipa_fn_summary::size_scale,
205 	       (time.to_double ()), found ? "" : "new ");
206       exec_pred.dump (dump_file, conds, 0);
207       if (exec_pred != nonconst_pred)
208 	{
209           fprintf (dump_file, " nonconst:");
210           nonconst_pred.dump (dump_file, conds);
211 	}
212       else
213         fprintf (dump_file, "\n");
214     }
215   if (!found)
216     {
217       class size_time_entry new_entry;
218       new_entry.size = size;
219       new_entry.time = time;
220       new_entry.exec_predicate = exec_pred;
221       new_entry.nonconst_predicate = nonconst_pred;
222       if (call)
223         vec_safe_push (call_size_time_table, new_entry);
224       else
225         vec_safe_push (size_time_table, new_entry);
226     }
227   else
228     {
229       e->size += size;
230       e->time += time;
231       /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
232       /* Tolerate small roundoff issues.  */
233       if (e->time < 0)
234 	e->time = 0;
235     }
236 }
237 
238 /* We proved E to be unreachable, redirect it to __builtin_unreachable.  */
239 
240 static struct cgraph_edge *
redirect_to_unreachable(struct cgraph_edge * e)241 redirect_to_unreachable (struct cgraph_edge *e)
242 {
243   struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
244   struct cgraph_node *target = cgraph_node::get_create
245 		      (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
246 
247   if (e->speculative)
248     e = cgraph_edge::resolve_speculation (e, target->decl);
249   else if (!e->callee)
250     e = cgraph_edge::make_direct (e, target);
251   else
252     e->redirect_callee (target);
253   class ipa_call_summary *es = ipa_call_summaries->get (e);
254   e->inline_failed = CIF_UNREACHABLE;
255   e->count = profile_count::zero ();
256   es->call_stmt_size = 0;
257   es->call_stmt_time = 0;
258   if (callee)
259     callee->remove_symbol_and_inline_clones ();
260   return e;
261 }
262 
263 /* Set predicate for edge E.  */
264 
265 static void
edge_set_predicate(struct cgraph_edge * e,predicate * predicate)266 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
267 {
268   /* If the edge is determined to be never executed, redirect it
269      to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
270      be optimized out.  */
271   if (predicate && *predicate == false
272       /* When handling speculative edges, we need to do the redirection
273          just once.  Do it always on the direct edge, so we do not
274 	 attempt to resolve speculation while duplicating the edge.  */
275       && (!e->speculative || e->callee))
276     e = redirect_to_unreachable (e);
277 
278   class ipa_call_summary *es = ipa_call_summaries->get (e);
279   if (predicate && *predicate != true)
280     {
281       if (!es->predicate)
282 	es->predicate = edge_predicate_pool.allocate ();
283       *es->predicate = *predicate;
284     }
285   else
286     {
287       if (es->predicate)
288 	edge_predicate_pool.remove (es->predicate);
289       es->predicate = NULL;
290     }
291 }
292 
293 /* Set predicate for hint *P.  */
294 
295 static void
set_hint_predicate(predicate ** p,predicate new_predicate)296 set_hint_predicate (predicate **p, predicate new_predicate)
297 {
298   if (new_predicate == false || new_predicate == true)
299     {
300       if (*p)
301 	edge_predicate_pool.remove (*p);
302       *p = NULL;
303     }
304   else
305     {
306       if (!*p)
307 	*p = edge_predicate_pool.allocate ();
308       **p = new_predicate;
309     }
310 }
311 
312 
313 /* Compute what conditions may or may not hold given information about
314    parameters.  RET_CLAUSE returns truths that may hold in a specialized copy,
315    while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
316    copy when called in a given context.  It is a bitmask of conditions. Bit
317    0 means that condition is known to be false, while bit 1 means that condition
318    may or may not be true.  These differs - for example NOT_INLINED condition
319    is always false in the second and also builtin_constant_p tests cannot use
320    the fact that parameter is indeed a constant.
321 
322    KNOWN_VALS is partial mapping of parameters of NODE to constant values.
323    KNOWN_AGGS is a vector of aggregate known offset/value set for each
324    parameter.  Return clause of possible truths.  When INLINE_P is true, assume
325    that we are inlining.
326 
327    ERROR_MARK means compile time invariant.  */
328 
329 static void
evaluate_conditions_for_known_args(struct cgraph_node * node,bool inline_p,vec<tree> known_vals,vec<value_range> known_value_ranges,vec<ipa_agg_value_set> known_aggs,clause_t * ret_clause,clause_t * ret_nonspec_clause)330 evaluate_conditions_for_known_args (struct cgraph_node *node,
331 				    bool inline_p,
332 				    vec<tree> known_vals,
333 				    vec<value_range> known_value_ranges,
334 				    vec<ipa_agg_value_set> known_aggs,
335 				    clause_t *ret_clause,
336 				    clause_t *ret_nonspec_clause)
337 {
338   clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
339   clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
340   class ipa_fn_summary *info = ipa_fn_summaries->get (node);
341   int i;
342   struct condition *c;
343 
344   for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
345     {
346       tree val = NULL;
347       tree res;
348       int j;
349       struct expr_eval_op *op;
350 
351       /* We allow call stmt to have fewer arguments than the callee function
352          (especially for K&R style programs).  So bound check here (we assume
353          known_aggs vector, if non-NULL, has the same length as
354          known_vals).  */
355       gcc_checking_assert (!known_aggs.length () || !known_vals.length ()
356 			   || (known_vals.length () == known_aggs.length ()));
357 
358       if (c->agg_contents)
359 	{
360 	  struct ipa_agg_value_set *agg;
361 
362 	  if (c->code == predicate::changed
363 	      && !c->by_ref
364 	      && c->operand_num < (int)known_vals.length ()
365 	      && (known_vals[c->operand_num] == error_mark_node))
366 	    continue;
367 
368 	  if (c->operand_num < (int)known_aggs.length ())
369 	    {
370 	      agg = &known_aggs[c->operand_num];
371 	      val = ipa_find_agg_cst_for_param (agg,
372 						c->operand_num
373 						   < (int) known_vals.length ()
374 						? known_vals[c->operand_num]
375 						: NULL,
376 						c->offset, c->by_ref);
377 	    }
378 	  else
379 	    val = NULL_TREE;
380 	}
381       else if (c->operand_num < (int) known_vals.length ())
382 	{
383 	  val = known_vals[c->operand_num];
384 	  if (val == error_mark_node && c->code != predicate::changed)
385 	    val = NULL_TREE;
386 	}
387 
388       if (!val
389 	  && (c->code == predicate::changed
390 	      || c->code == predicate::is_not_constant))
391 	{
392 	  clause |= 1 << (i + predicate::first_dynamic_condition);
393 	  nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
394 	  continue;
395 	}
396       if (c->code == predicate::changed)
397 	{
398 	  nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
399 	  continue;
400 	}
401 
402       if (c->code == predicate::is_not_constant)
403 	{
404 	  nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
405 	  continue;
406 	}
407 
408       if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
409 	{
410 	  if (c->type != TREE_TYPE (val))
411 	    val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
412 	  for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
413 	    {
414 	      if (!val)
415 		break;
416 	      if (!op->val[0])
417 		val = fold_unary (op->code, op->type, val);
418 	      else if (!op->val[1])
419 		val = fold_binary (op->code, op->type,
420 				   op->index ? op->val[0] : val,
421 				   op->index ? val : op->val[0]);
422 	      else if (op->index == 0)
423 		val = fold_ternary (op->code, op->type,
424 				    val, op->val[0], op->val[1]);
425 	      else if (op->index == 1)
426 		val = fold_ternary (op->code, op->type,
427 				    op->val[0], val, op->val[1]);
428 	      else if (op->index == 2)
429 		val = fold_ternary (op->code, op->type,
430 				    op->val[0], op->val[1], val);
431 	      else
432 		val = NULL_TREE;
433 	    }
434 
435 	  res = val
436 	    ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
437 	    : NULL;
438 
439 	  if (res && integer_zerop (res))
440 	    continue;
441 	  if (res && integer_onep (res))
442 	    {
443 	      clause |= 1 << (i + predicate::first_dynamic_condition);
444 	      nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
445 	      continue;
446 	    }
447 	}
448       if (c->operand_num < (int) known_value_ranges.length ()
449 	  && !c->agg_contents
450 	  && !known_value_ranges[c->operand_num].undefined_p ()
451 	  && !known_value_ranges[c->operand_num].varying_p ()
452 	  && TYPE_SIZE (c->type)
453 		 == TYPE_SIZE (known_value_ranges[c->operand_num].type ())
454 	  && (!val || TREE_CODE (val) != INTEGER_CST))
455 	{
456 	  value_range vr = known_value_ranges[c->operand_num];
457 	  if (!useless_type_conversion_p (c->type, vr.type ()))
458 	    {
459 	      value_range res;
460 	      range_fold_unary_expr (&res, NOP_EXPR,
461 				     c->type, &vr, vr.type ());
462 	      vr = res;
463 	    }
464 	  tree type = c->type;
465 
466 	  for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
467 	    {
468 	      if (vr.varying_p () || vr.undefined_p ())
469 		break;
470 
471 	      value_range res;
472 	      if (!op->val[0])
473 	        range_fold_unary_expr (&res, op->code, op->type, &vr, type);
474 	      else if (!op->val[1])
475 		{
476 		  value_range op0 (op->val[0], op->val[0]);
477 		  range_fold_binary_expr (&res, op->code, op->type,
478 					  op->index ? &op0 : &vr,
479 					  op->index ? &vr : &op0);
480 		}
481 	      else
482 		gcc_unreachable ();
483 	      type = op->type;
484 	      vr = res;
485 	    }
486 	  if (!vr.varying_p () && !vr.undefined_p ())
487 	    {
488 	      value_range res;
489 	      value_range val_vr (c->val, c->val);
490 	      range_fold_binary_expr (&res, c->code, boolean_type_node,
491 				      &vr,
492 				      &val_vr);
493 	      if (res.zero_p ())
494 		continue;
495 	    }
496 	}
497 
498       clause |= 1 << (i + predicate::first_dynamic_condition);
499       nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
500     }
501   *ret_clause = clause;
502   if (ret_nonspec_clause)
503     *ret_nonspec_clause = nonspec_clause;
504 }
505 
506 /* Return true if VRP will be exectued on the function.
507    We do not want to anticipate optimizations that will not happen.
508 
509    FIXME: This can be confused with -fdisable and debug counters and thus
510    it should not be used for correctness (only to make heuristics work).
511    This means that inliner should do its own optimizations of expressions
512    that it predicts to be constant so wrong code can not be triggered by
513    builtin_constant_p.  */
514 
515 static bool
vrp_will_run_p(struct cgraph_node * node)516 vrp_will_run_p (struct cgraph_node *node)
517 {
518   return (opt_for_fn (node->decl, optimize)
519 	  && !opt_for_fn (node->decl, optimize_debug)
520 	  && opt_for_fn (node->decl, flag_tree_vrp));
521 }
522 
523 /* Similarly about FRE.  */
524 
525 static bool
fre_will_run_p(struct cgraph_node * node)526 fre_will_run_p (struct cgraph_node *node)
527 {
528   return (opt_for_fn (node->decl, optimize)
529 	  && !opt_for_fn (node->decl, optimize_debug)
530 	  && opt_for_fn (node->decl, flag_tree_fre));
531 }
532 
533 /* Work out what conditions might be true at invocation of E.
534    Compute costs for inlined edge if INLINE_P is true.
535 
536    Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
537    (if non-NULL) conditions evaluated for nonspecialized clone called
538    in a given context.
539 
540    KNOWN_VALS_PTR and KNOWN_AGGS_PTR must be non-NULL and will be filled by
541    known constant and aggregate values of parameters.
542 
543    KNOWN_CONTEXT_PTR, if non-NULL, will be filled by polymorphic call contexts
544    of parameter used by a polymorphic call.  */
545 
546 void
evaluate_properties_for_edge(struct cgraph_edge * e,bool inline_p,clause_t * clause_ptr,clause_t * nonspec_clause_ptr,vec<tree> * known_vals_ptr,vec<ipa_polymorphic_call_context> * known_contexts_ptr,vec<ipa_agg_value_set> * known_aggs_ptr)547 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
548 			      clause_t *clause_ptr,
549 			      clause_t *nonspec_clause_ptr,
550 			      vec<tree> *known_vals_ptr,
551 			      vec<ipa_polymorphic_call_context>
552 			      *known_contexts_ptr,
553 			      vec<ipa_agg_value_set> *known_aggs_ptr)
554 {
555   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
556   class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
557   auto_vec<value_range, 32> known_value_ranges;
558   class ipa_edge_args *args;
559 
560   if (clause_ptr)
561     *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
562 
563   if (ipa_node_params_sum
564       && !e->call_stmt_cannot_inline_p
565       && (info->conds || known_contexts_ptr)
566       && (args = IPA_EDGE_REF (e)) != NULL)
567     {
568       struct cgraph_node *caller;
569       class ipa_node_params *caller_parms_info, *callee_pi = NULL;
570       class ipa_call_summary *es = ipa_call_summaries->get (e);
571       int i, count = ipa_get_cs_argument_count (args);
572 
573       if (count)
574 	{
575 	  if (e->caller->inlined_to)
576 	    caller = e->caller->inlined_to;
577 	  else
578 	    caller = e->caller;
579 	  caller_parms_info = IPA_NODE_REF (caller);
580           callee_pi = IPA_NODE_REF (callee);
581 
582 	  /* Watch for thunks.  */
583 	  if (callee_pi)
584 	    /* Watch for variadic functions.  */
585 	    count = MIN (count, ipa_get_param_count (callee_pi));
586 	}
587 
588       if (callee_pi)
589 	for (i = 0; i < count; i++)
590 	  {
591 	    struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
592 
593 	    if (ipa_is_param_used_by_indirect_call (callee_pi, i)
594 		|| ipa_is_param_used_by_ipa_predicates (callee_pi, i))
595 	      {
596 		/* Determine if we know constant value of the parameter.  */
597 		tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
598 						 ipa_get_type (callee_pi, i));
599 
600 		if (!cst && e->call_stmt
601 		    && i < (int)gimple_call_num_args (e->call_stmt))
602 		  {
603 		    cst = gimple_call_arg (e->call_stmt, i);
604 		    if (!is_gimple_min_invariant (cst))
605 		      cst = NULL;
606 		  }
607 		if (cst)
608 		  {
609 		    gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
610 		    if (!known_vals_ptr->length ())
611 		      vec_safe_grow_cleared (known_vals_ptr, count);
612 		    (*known_vals_ptr)[i] = cst;
613 		  }
614 		else if (inline_p && !es->param[i].change_prob)
615 		  {
616 		    if (!known_vals_ptr->length ())
617 		      vec_safe_grow_cleared (known_vals_ptr, count);
618 		    (*known_vals_ptr)[i] = error_mark_node;
619 		  }
620 
621 		/* If we failed to get simple constant, try value range.  */
622 		if ((!cst || TREE_CODE (cst) != INTEGER_CST)
623 		    && vrp_will_run_p (caller)
624 		    && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
625 		  {
626 		    value_range vr
627 		       = ipa_value_range_from_jfunc (caller_parms_info, e, jf,
628 						     ipa_get_type (callee_pi,
629 								   i));
630 		    if (!vr.undefined_p () && !vr.varying_p ())
631 		      {
632 			if (!known_value_ranges.length ())
633 			  known_value_ranges.safe_grow_cleared (count);
634 			known_value_ranges[i] = vr;
635 		      }
636 		  }
637 
638 		/* Determine known aggregate values.  */
639 		if (fre_will_run_p (caller))
640 		  {
641 		    ipa_agg_value_set agg
642 			= ipa_agg_value_set_from_jfunc (caller_parms_info,
643 							caller, &jf->agg);
644 		    if (agg.items.length ())
645 		      {
646 			if (!known_aggs_ptr->length ())
647 			  vec_safe_grow_cleared (known_aggs_ptr, count);
648 			(*known_aggs_ptr)[i] = agg;
649 		      }
650 		  }
651 	      }
652 
653 	    /* For calls used in polymorphic calls we further determine
654 	       polymorphic call context.  */
655 	    if (known_contexts_ptr
656 		&& ipa_is_param_used_by_polymorphic_call (callee_pi, i))
657 	      {
658 		ipa_polymorphic_call_context
659 		   ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
660 		if (!ctx.useless_p ())
661 		  {
662 		    if (!known_contexts_ptr->length ())
663 		      known_contexts_ptr->safe_grow_cleared (count);
664 		    (*known_contexts_ptr)[i]
665 		      = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
666 		  }
667 	       }
668 	  }
669 	else
670 	  gcc_assert (!count || callee->thunk.thunk_p);
671     }
672   else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
673     {
674       int i, count = (int)gimple_call_num_args (e->call_stmt);
675 
676       for (i = 0; i < count; i++)
677 	{
678 	  tree cst = gimple_call_arg (e->call_stmt, i);
679 	  if (!is_gimple_min_invariant (cst))
680 	    cst = NULL;
681 	  if (cst)
682 	    {
683 	      if (!known_vals_ptr->length ())
684 	        vec_safe_grow_cleared (known_vals_ptr, count);
685 	      (*known_vals_ptr)[i] = cst;
686 	    }
687 	}
688     }
689 
690   evaluate_conditions_for_known_args (callee, inline_p,
691 				      *known_vals_ptr,
692 				      known_value_ranges,
693 				      *known_aggs_ptr,
694 				      clause_ptr,
695 				      nonspec_clause_ptr);
696 }
697 
698 
699 /* Allocate the function summary. */
700 
701 static void
ipa_fn_summary_alloc(void)702 ipa_fn_summary_alloc (void)
703 {
704   gcc_checking_assert (!ipa_fn_summaries);
705   ipa_size_summaries = new ipa_size_summary_t (symtab);
706   ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
707   ipa_call_summaries = new ipa_call_summary_t (symtab);
708 }
709 
~ipa_call_summary()710 ipa_call_summary::~ipa_call_summary ()
711 {
712   if (predicate)
713     edge_predicate_pool.remove (predicate);
714 
715   param.release ();
716 }
717 
~ipa_fn_summary()718 ipa_fn_summary::~ipa_fn_summary ()
719 {
720   if (loop_iterations)
721     edge_predicate_pool.remove (loop_iterations);
722   if (loop_stride)
723     edge_predicate_pool.remove (loop_stride);
724   vec_free (conds);
725   vec_free (size_time_table);
726   vec_free (call_size_time_table);
727 }
728 
729 void
remove_callees(cgraph_node * node)730 ipa_fn_summary_t::remove_callees (cgraph_node *node)
731 {
732   cgraph_edge *e;
733   for (e = node->callees; e; e = e->next_callee)
734     ipa_call_summaries->remove (e);
735   for (e = node->indirect_calls; e; e = e->next_callee)
736     ipa_call_summaries->remove (e);
737 }
738 
739 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
740    Additionally care about allocating new memory slot for updated predicate
741    and set it to NULL when it becomes true or false (and thus uninteresting).
742  */
743 
744 static void
remap_hint_predicate_after_duplication(predicate ** p,clause_t possible_truths)745 remap_hint_predicate_after_duplication (predicate **p,
746 					clause_t possible_truths)
747 {
748   predicate new_predicate;
749 
750   if (!*p)
751     return;
752 
753   new_predicate = (*p)->remap_after_duplication (possible_truths);
754   /* We do not want to free previous predicate; it is used by node origin.  */
755   *p = NULL;
756   set_hint_predicate (p, new_predicate);
757 }
758 
759 
760 /* Hook that is called by cgraph.c when a node is duplicated.  */
761 void
duplicate(cgraph_node * src,cgraph_node * dst,ipa_fn_summary *,ipa_fn_summary * info)762 ipa_fn_summary_t::duplicate (cgraph_node *src,
763 			     cgraph_node *dst,
764 			     ipa_fn_summary *,
765 			     ipa_fn_summary *info)
766 {
767   new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
768   /* TODO: as an optimization, we may avoid copying conditions
769      that are known to be false or true.  */
770   info->conds = vec_safe_copy (info->conds);
771 
772   /* When there are any replacements in the function body, see if we can figure
773      out that something was optimized out.  */
774   if (ipa_node_params_sum && dst->clone.tree_map)
775     {
776       vec<size_time_entry, va_gc> *entry = info->size_time_table;
777       /* Use SRC parm info since it may not be copied yet.  */
778       class ipa_node_params *parms_info = IPA_NODE_REF (src);
779       vec<tree> known_vals = vNULL;
780       int count = ipa_get_param_count (parms_info);
781       int i, j;
782       clause_t possible_truths;
783       predicate true_pred = true;
784       size_time_entry *e;
785       int optimized_out_size = 0;
786       bool inlined_to_p = false;
787       struct cgraph_edge *edge, *next;
788 
789       info->size_time_table = 0;
790       known_vals.safe_grow_cleared (count);
791       for (i = 0; i < count; i++)
792 	{
793 	  struct ipa_replace_map *r;
794 
795 	  for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
796 	    {
797 	      if (r->parm_num == i)
798 		{
799 		  known_vals[i] = r->new_tree;
800 		  break;
801 		}
802 	    }
803 	}
804       evaluate_conditions_for_known_args (dst, false,
805 					  known_vals,
806 					  vNULL,
807 					  vNULL,
808 					  &possible_truths,
809 					  /* We are going to specialize,
810 					     so ignore nonspec truths.  */
811 					  NULL);
812       known_vals.release ();
813 
814       info->account_size_time (0, 0, true_pred, true_pred);
815 
816       /* Remap size_time vectors.
817          Simplify the predicate by pruning out alternatives that are known
818          to be false.
819          TODO: as on optimization, we can also eliminate conditions known
820          to be true.  */
821       for (i = 0; vec_safe_iterate (entry, i, &e); i++)
822 	{
823 	  predicate new_exec_pred;
824 	  predicate new_nonconst_pred;
825 	  new_exec_pred = e->exec_predicate.remap_after_duplication
826 				 (possible_truths);
827 	  new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
828 		  		 (possible_truths);
829 	  if (new_exec_pred == false || new_nonconst_pred == false)
830 	    optimized_out_size += e->size;
831 	  else
832 	    info->account_size_time (e->size, e->time, new_exec_pred,
833 			             new_nonconst_pred);
834 	}
835 
836       /* Remap edge predicates with the same simplification as above.
837          Also copy constantness arrays.   */
838       for (edge = dst->callees; edge; edge = next)
839 	{
840 	  predicate new_predicate;
841 	  class ipa_call_summary *es = ipa_call_summaries->get (edge);
842 	  next = edge->next_callee;
843 
844 	  if (!edge->inline_failed)
845 	    inlined_to_p = true;
846 	  if (!es->predicate)
847 	    continue;
848 	  new_predicate = es->predicate->remap_after_duplication
849 	    (possible_truths);
850 	  if (new_predicate == false && *es->predicate != false)
851 	    optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
852 	  edge_set_predicate (edge, &new_predicate);
853 	}
854 
855       /* Remap indirect edge predicates with the same simplification as above.
856          Also copy constantness arrays.   */
857       for (edge = dst->indirect_calls; edge; edge = next)
858 	{
859 	  predicate new_predicate;
860 	  class ipa_call_summary *es = ipa_call_summaries->get (edge);
861 	  next = edge->next_callee;
862 
863 	  gcc_checking_assert (edge->inline_failed);
864 	  if (!es->predicate)
865 	    continue;
866 	  new_predicate = es->predicate->remap_after_duplication
867 				 (possible_truths);
868 	  if (new_predicate == false && *es->predicate != false)
869 	    optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
870 	  edge_set_predicate (edge, &new_predicate);
871 	}
872       remap_hint_predicate_after_duplication (&info->loop_iterations,
873 					      possible_truths);
874       remap_hint_predicate_after_duplication (&info->loop_stride,
875 					      possible_truths);
876 
877       /* If inliner or someone after inliner will ever start producing
878          non-trivial clones, we will get trouble with lack of information
879          about updating self sizes, because size vectors already contains
880          sizes of the callees.  */
881       gcc_assert (!inlined_to_p || !optimized_out_size);
882     }
883   else
884     {
885       info->size_time_table = vec_safe_copy (info->size_time_table);
886       if (info->loop_iterations)
887 	{
888 	  predicate p = *info->loop_iterations;
889 	  info->loop_iterations = NULL;
890 	  set_hint_predicate (&info->loop_iterations, p);
891 	}
892       if (info->loop_stride)
893 	{
894 	  predicate p = *info->loop_stride;
895 	  info->loop_stride = NULL;
896 	  set_hint_predicate (&info->loop_stride, p);
897 	}
898     }
899   if (!dst->inlined_to)
900     ipa_update_overall_fn_summary (dst);
901 }
902 
903 
904 /* Hook that is called by cgraph.c when a node is duplicated.  */
905 
906 void
duplicate(struct cgraph_edge * src,struct cgraph_edge * dst,class ipa_call_summary * srcinfo,class ipa_call_summary * info)907 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
908 			       struct cgraph_edge *dst,
909 			       class ipa_call_summary *srcinfo,
910 			       class ipa_call_summary *info)
911 {
912   new (info) ipa_call_summary (*srcinfo);
913   info->predicate = NULL;
914   edge_set_predicate (dst, srcinfo->predicate);
915   info->param = srcinfo->param.copy ();
916   if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
917     {
918       info->call_stmt_size -= (eni_size_weights.indirect_call_cost
919 			       - eni_size_weights.call_cost);
920       info->call_stmt_time -= (eni_time_weights.indirect_call_cost
921 			       - eni_time_weights.call_cost);
922     }
923 }
924 
925 /* Dump edge summaries associated to NODE and recursively to all clones.
926    Indent by INDENT.  */
927 
928 static void
dump_ipa_call_summary(FILE * f,int indent,struct cgraph_node * node,class ipa_fn_summary * info)929 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
930 		       class ipa_fn_summary *info)
931 {
932   struct cgraph_edge *edge;
933   for (edge = node->callees; edge; edge = edge->next_callee)
934     {
935       class ipa_call_summary *es = ipa_call_summaries->get (edge);
936       struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
937       int i;
938 
939       fprintf (f,
940 	       "%*s%s %s\n%*s  freq:%4.2f",
941 	       indent, "", callee->dump_name (),
942 	       !edge->inline_failed
943 	       ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
944 	       indent, "", edge->sreal_frequency ().to_double ());
945 
946       if (cross_module_call_p (edge))
947 	fprintf (f, " cross module");
948 
949       if (es)
950 	fprintf (f, " loop depth:%2i size:%2i time: %2i",
951 		 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
952 
953       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
954       ipa_size_summary *ss = ipa_size_summaries->get (callee);
955       if (s != NULL)
956 	fprintf (f, " callee size:%2i stack:%2i",
957 		 (int) (ss->size / ipa_fn_summary::size_scale),
958 		 (int) s->estimated_stack_size);
959 
960       if (es && es->predicate)
961 	{
962 	  fprintf (f, " predicate: ");
963 	  es->predicate->dump (f, info->conds);
964 	}
965       else
966 	fprintf (f, "\n");
967       if (es && es->param.exists ())
968 	for (i = 0; i < (int) es->param.length (); i++)
969 	  {
970 	    int prob = es->param[i].change_prob;
971 
972 	    if (!prob)
973 	      fprintf (f, "%*s op%i is compile time invariant\n",
974 		       indent + 2, "", i);
975 	    else if (prob != REG_BR_PROB_BASE)
976 	      fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
977 		       prob * 100.0 / REG_BR_PROB_BASE);
978 	  }
979       if (!edge->inline_failed)
980 	{
981 	  ipa_size_summary *ss = ipa_size_summaries->get (callee);
982 	  fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
983 		   indent + 2, "",
984 		   (int) ipa_get_stack_frame_offset (callee),
985 		   (int) ss->estimated_self_stack_size);
986 	  dump_ipa_call_summary (f, indent + 2, callee, info);
987 	}
988     }
989   for (edge = node->indirect_calls; edge; edge = edge->next_callee)
990     {
991       class ipa_call_summary *es = ipa_call_summaries->get (edge);
992       fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
993 	       " time: %2i",
994 	       indent, "",
995 	       es->loop_depth,
996 	       edge->sreal_frequency ().to_double (), es->call_stmt_size,
997 	       es->call_stmt_time);
998       if (es->predicate)
999 	{
1000 	  fprintf (f, "predicate: ");
1001 	  es->predicate->dump (f, info->conds);
1002 	}
1003       else
1004 	fprintf (f, "\n");
1005     }
1006 }
1007 
1008 
1009 void
ipa_dump_fn_summary(FILE * f,struct cgraph_node * node)1010 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
1011 {
1012   if (node->definition)
1013     {
1014       class ipa_fn_summary *s = ipa_fn_summaries->get (node);
1015       class ipa_size_summary *ss = ipa_size_summaries->get (node);
1016       if (s != NULL)
1017 	{
1018 	  size_time_entry *e;
1019 	  int i;
1020 	  fprintf (f, "IPA function summary for %s", node->dump_name ());
1021 	  if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1022 	    fprintf (f, " always_inline");
1023 	  if (s->inlinable)
1024 	    fprintf (f, " inlinable");
1025 	  if (s->fp_expressions)
1026 	    fprintf (f, " fp_expression");
1027 	  fprintf (f, "\n  global time:     %f\n", s->time.to_double ());
1028 	  fprintf (f, "  self size:       %i\n", ss->self_size);
1029 	  fprintf (f, "  global size:     %i\n", ss->size);
1030 	  fprintf (f, "  min size:       %i\n", s->min_size);
1031 	  fprintf (f, "  self stack:      %i\n",
1032 		   (int) ss->estimated_self_stack_size);
1033 	  fprintf (f, "  global stack:    %i\n", (int) s->estimated_stack_size);
1034 	  if (s->growth)
1035 	    fprintf (f, "  estimated growth:%i\n", (int) s->growth);
1036 	  if (s->scc_no)
1037 	    fprintf (f, "  In SCC:          %i\n", (int) s->scc_no);
1038 	  for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
1039 	    {
1040 	      fprintf (f, "    size:%f, time:%f",
1041 		       (double) e->size / ipa_fn_summary::size_scale,
1042 		       e->time.to_double ());
1043 	      if (e->exec_predicate != true)
1044 		{
1045 		  fprintf (f, ",  executed if:");
1046 		  e->exec_predicate.dump (f, s->conds, 0);
1047 		}
1048 	      if (e->exec_predicate != e->nonconst_predicate)
1049 		{
1050 		  fprintf (f, ",  nonconst if:");
1051 		  e->nonconst_predicate.dump (f, s->conds, 0);
1052 		}
1053 	      fprintf (f, "\n");
1054 	    }
1055 	  if (s->loop_iterations)
1056 	    {
1057 	      fprintf (f, "  loop iterations:");
1058 	      s->loop_iterations->dump (f, s->conds);
1059 	    }
1060 	  if (s->loop_stride)
1061 	    {
1062 	      fprintf (f, "  loop stride:");
1063 	      s->loop_stride->dump (f, s->conds);
1064 	    }
1065 	  fprintf (f, "  calls:\n");
1066 	  dump_ipa_call_summary (f, 4, node, s);
1067 	  fprintf (f, "\n");
1068 	}
1069       else
1070 	fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1071     }
1072 }
1073 
1074 DEBUG_FUNCTION void
ipa_debug_fn_summary(struct cgraph_node * node)1075 ipa_debug_fn_summary (struct cgraph_node *node)
1076 {
1077   ipa_dump_fn_summary (stderr, node);
1078 }
1079 
1080 void
ipa_dump_fn_summaries(FILE * f)1081 ipa_dump_fn_summaries (FILE *f)
1082 {
1083   struct cgraph_node *node;
1084 
1085   FOR_EACH_DEFINED_FUNCTION (node)
1086     if (!node->inlined_to)
1087       ipa_dump_fn_summary (f, node);
1088 }
1089 
1090 /* Callback of walk_aliased_vdefs.  Flags that it has been invoked to the
1091    boolean variable pointed to by DATA.  */
1092 
1093 static bool
mark_modified(ao_ref * ao ATTRIBUTE_UNUSED,tree vdef ATTRIBUTE_UNUSED,void * data)1094 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1095 	       void *data)
1096 {
1097   bool *b = (bool *) data;
1098   *b = true;
1099   return true;
1100 }
1101 
1102 /* If OP refers to value of function parameter, return the corresponding
1103    parameter.  If non-NULL, the size of the memory load (or the SSA_NAME of the
1104    PARM_DECL) will be stored to *SIZE_P in that case too.  */
1105 
1106 static tree
unmodified_parm_1(ipa_func_body_info * fbi,gimple * stmt,tree op,poly_int64 * size_p)1107 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1108 		   poly_int64 *size_p)
1109 {
1110   /* SSA_NAME referring to parm default def?  */
1111   if (TREE_CODE (op) == SSA_NAME
1112       && SSA_NAME_IS_DEFAULT_DEF (op)
1113       && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1114     {
1115       if (size_p)
1116 	*size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1117       return SSA_NAME_VAR (op);
1118     }
1119   /* Non-SSA parm reference?  */
1120   if (TREE_CODE (op) == PARM_DECL)
1121     {
1122       bool modified = false;
1123 
1124       ao_ref refd;
1125       ao_ref_init (&refd, op);
1126       int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1127 				       mark_modified, &modified, NULL, NULL,
1128 				       fbi->aa_walk_budget + 1);
1129       if (walked < 0)
1130 	{
1131 	  fbi->aa_walk_budget = 0;
1132 	  return NULL_TREE;
1133 	}
1134       if (!modified)
1135 	{
1136 	  if (size_p)
1137 	    *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1138 	  return op;
1139 	}
1140     }
1141   return NULL_TREE;
1142 }
1143 
1144 /* If OP refers to value of function parameter, return the corresponding
1145    parameter.  Also traverse chains of SSA register assignments.  If non-NULL,
1146    the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1147    stored to *SIZE_P in that case too.  */
1148 
1149 static tree
unmodified_parm(ipa_func_body_info * fbi,gimple * stmt,tree op,poly_int64 * size_p)1150 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1151 		 poly_int64 *size_p)
1152 {
1153   tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1154   if (res)
1155     return res;
1156 
1157   if (TREE_CODE (op) == SSA_NAME
1158       && !SSA_NAME_IS_DEFAULT_DEF (op)
1159       && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1160     return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1161 			    gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1162 			    size_p);
1163   return NULL_TREE;
1164 }
1165 
1166 /* If OP refers to a value of a function parameter or value loaded from an
1167    aggregate passed to a parameter (either by value or reference), return TRUE
1168    and store the number of the parameter to *INDEX_P, the access size into
1169    *SIZE_P, and information whether and how it has been loaded from an
1170    aggregate into *AGGPOS.  INFO describes the function parameters, STMT is the
1171    statement in which OP is used or loaded.  */
1172 
1173 static bool
unmodified_parm_or_parm_agg_item(struct ipa_func_body_info * fbi,gimple * stmt,tree op,int * index_p,poly_int64 * size_p,struct agg_position_info * aggpos)1174 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1175 				  gimple *stmt, tree op, int *index_p,
1176 				  poly_int64 *size_p,
1177 				  struct agg_position_info *aggpos)
1178 {
1179   tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1180 
1181   gcc_checking_assert (aggpos);
1182   if (res)
1183     {
1184       *index_p = ipa_get_param_decl_index (fbi->info, res);
1185       if (*index_p < 0)
1186 	return false;
1187       aggpos->agg_contents = false;
1188       aggpos->by_ref = false;
1189       return true;
1190     }
1191 
1192   if (TREE_CODE (op) == SSA_NAME)
1193     {
1194       if (SSA_NAME_IS_DEFAULT_DEF (op)
1195 	  || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1196 	return false;
1197       stmt = SSA_NAME_DEF_STMT (op);
1198       op = gimple_assign_rhs1 (stmt);
1199       if (!REFERENCE_CLASS_P (op))
1200 	return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1201 						 aggpos);
1202     }
1203 
1204   aggpos->agg_contents = true;
1205   return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1206 				 stmt, op, index_p, &aggpos->offset,
1207 				 size_p, &aggpos->by_ref);
1208 }
1209 
1210 /* See if statement might disappear after inlining.
1211    0 - means not eliminated
1212    1 - half of statements goes away
1213    2 - for sure it is eliminated.
1214    We are not terribly sophisticated, basically looking for simple abstraction
1215    penalty wrappers.  */
1216 
1217 static int
eliminated_by_inlining_prob(ipa_func_body_info * fbi,gimple * stmt)1218 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1219 {
1220   enum gimple_code code = gimple_code (stmt);
1221   enum tree_code rhs_code;
1222 
1223   if (!optimize)
1224     return 0;
1225 
1226   switch (code)
1227     {
1228     case GIMPLE_RETURN:
1229       return 2;
1230     case GIMPLE_ASSIGN:
1231       if (gimple_num_ops (stmt) != 2)
1232 	return 0;
1233 
1234       rhs_code = gimple_assign_rhs_code (stmt);
1235 
1236       /* Casts of parameters, loads from parameters passed by reference
1237          and stores to return value or parameters are often free after
1238          inlining due to SRA and further combining.
1239          Assume that half of statements goes away.  */
1240       if (CONVERT_EXPR_CODE_P (rhs_code)
1241 	  || rhs_code == VIEW_CONVERT_EXPR
1242 	  || rhs_code == ADDR_EXPR
1243 	  || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1244 	{
1245 	  tree rhs = gimple_assign_rhs1 (stmt);
1246 	  tree lhs = gimple_assign_lhs (stmt);
1247 	  tree inner_rhs = get_base_address (rhs);
1248 	  tree inner_lhs = get_base_address (lhs);
1249 	  bool rhs_free = false;
1250 	  bool lhs_free = false;
1251 
1252 	  if (!inner_rhs)
1253 	    inner_rhs = rhs;
1254 	  if (!inner_lhs)
1255 	    inner_lhs = lhs;
1256 
1257 	  /* Reads of parameter are expected to be free.  */
1258 	  if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1259 	    rhs_free = true;
1260 	  /* Match expressions of form &this->field. Those will most likely
1261 	     combine with something upstream after inlining.  */
1262 	  else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1263 	    {
1264 	      tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1265 	      if (TREE_CODE (op) == PARM_DECL)
1266 		rhs_free = true;
1267 	      else if (TREE_CODE (op) == MEM_REF
1268 		       && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1269 					   NULL))
1270 		rhs_free = true;
1271 	    }
1272 
1273 	  /* When parameter is not SSA register because its address is taken
1274 	     and it is just copied into one, the statement will be completely
1275 	     free after inlining (we will copy propagate backward).   */
1276 	  if (rhs_free && is_gimple_reg (lhs))
1277 	    return 2;
1278 
1279 	  /* Reads of parameters passed by reference
1280 	     expected to be free (i.e. optimized out after inlining).  */
1281 	  if (TREE_CODE (inner_rhs) == MEM_REF
1282 	      && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1283 	    rhs_free = true;
1284 
1285 	  /* Copying parameter passed by reference into gimple register is
1286 	     probably also going to copy propagate, but we can't be quite
1287 	     sure.  */
1288 	  if (rhs_free && is_gimple_reg (lhs))
1289 	    lhs_free = true;
1290 
1291 	  /* Writes to parameters, parameters passed by value and return value
1292 	     (either directly or passed via invisible reference) are free.
1293 
1294 	     TODO: We ought to handle testcase like
1295 	     struct a {int a,b;};
1296 	     struct a
1297 	     returnstruct (void)
1298 	     {
1299 	     struct a a ={1,2};
1300 	     return a;
1301 	     }
1302 
1303 	     This translate into:
1304 
1305 	     returnstruct ()
1306 	     {
1307 	     int a$b;
1308 	     int a$a;
1309 	     struct a a;
1310 	     struct a D.2739;
1311 
1312 	     <bb 2>:
1313 	     D.2739.a = 1;
1314 	     D.2739.b = 2;
1315 	     return D.2739;
1316 
1317 	     }
1318 	     For that we either need to copy ipa-split logic detecting writes
1319 	     to return value.  */
1320 	  if (TREE_CODE (inner_lhs) == PARM_DECL
1321 	      || TREE_CODE (inner_lhs) == RESULT_DECL
1322 	      || (TREE_CODE (inner_lhs) == MEM_REF
1323 		  && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1324 				       NULL)
1325 		      || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1326 			  && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1327 			  && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1328 						      (inner_lhs,
1329 						       0))) == RESULT_DECL))))
1330 	    lhs_free = true;
1331 	  if (lhs_free
1332 	      && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1333 	    rhs_free = true;
1334 	  if (lhs_free && rhs_free)
1335 	    return 1;
1336 	}
1337       return 0;
1338     default:
1339       return 0;
1340     }
1341 }
1342 
1343 /* Analyze EXPR if it represents a series of simple operations performed on
1344    a function parameter and return true if so.  FBI, STMT, EXPR, INDEX_P and
1345    AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1346    Type of the parameter or load from an aggregate via the parameter is
1347    stored in *TYPE_P.  Operations on the parameter are recorded to
1348    PARAM_OPS_P if it is not NULL.  */
1349 
1350 static bool
1351 decompose_param_expr (struct ipa_func_body_info *fbi,
1352 		      gimple *stmt, tree expr,
1353 		      int *index_p, tree *type_p,
1354 		      struct agg_position_info *aggpos,
1355 		      expr_eval_ops *param_ops_p = NULL)
1356 {
1357   int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops);
1358   int op_count = 0;
1359 
1360   if (param_ops_p)
1361     *param_ops_p = NULL;
1362 
1363   while (true)
1364     {
1365       expr_eval_op eval_op;
1366       unsigned rhs_count;
1367       unsigned cst_count = 0;
1368 
1369       if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1370 					    aggpos))
1371 	{
1372 	  tree type = TREE_TYPE (expr);
1373 
1374 	  if (aggpos->agg_contents)
1375 	    {
1376 	      /* Stop if containing bit-field.  */
1377 	      if (TREE_CODE (expr) == BIT_FIELD_REF
1378 		  || contains_bitfld_component_ref_p (expr))
1379 		break;
1380 	    }
1381 
1382 	  *type_p = type;
1383 	  return true;
1384 	}
1385 
1386       if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1387 	break;
1388 
1389       if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1390 	break;
1391 
1392       switch (gimple_assign_rhs_class (stmt))
1393 	{
1394 	case GIMPLE_SINGLE_RHS:
1395 	  expr = gimple_assign_rhs1 (stmt);
1396 	  continue;
1397 
1398 	case GIMPLE_UNARY_RHS:
1399 	  rhs_count = 1;
1400 	  break;
1401 
1402 	case GIMPLE_BINARY_RHS:
1403 	  rhs_count = 2;
1404 	  break;
1405 
1406 	case GIMPLE_TERNARY_RHS:
1407 	  rhs_count = 3;
1408 	  break;
1409 
1410 	default:
1411 	  goto fail;
1412 	}
1413 
1414       /* Stop if expression is too complex.  */
1415       if (op_count++ == op_limit)
1416 	break;
1417 
1418       if (param_ops_p)
1419 	{
1420 	  eval_op.code = gimple_assign_rhs_code (stmt);
1421 	  eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1422 	  eval_op.val[0] = NULL_TREE;
1423 	  eval_op.val[1] = NULL_TREE;
1424 	}
1425 
1426       expr = NULL_TREE;
1427       for (unsigned i = 0; i < rhs_count; i++)
1428 	{
1429 	  tree op = gimple_op (stmt, i + 1);
1430 
1431 	  gcc_assert (op && !TYPE_P (op));
1432 	  if (is_gimple_ip_invariant (op))
1433 	    {
1434 	      if (++cst_count == rhs_count)
1435 		goto fail;
1436 
1437 	      eval_op.val[cst_count - 1] = op;
1438 	    }
1439 	  else if (!expr)
1440 	    {
1441 	      /* Found a non-constant operand, and record its index in rhs
1442 		 operands.  */
1443 	      eval_op.index = i;
1444 	      expr = op;
1445 	    }
1446 	  else
1447 	    {
1448 	      /* Found more than one non-constant operands.  */
1449 	      goto fail;
1450 	    }
1451 	}
1452 
1453       if (param_ops_p)
1454 	vec_safe_insert (*param_ops_p, 0, eval_op);
1455     }
1456 
1457   /* Failed to decompose, free resource and return.  */
1458 fail:
1459   if (param_ops_p)
1460     vec_free (*param_ops_p);
1461 
1462   return false;
1463 }
1464 
1465 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1466    predicates to the CFG edges.   */
1467 
1468 static void
set_cond_stmt_execution_predicate(struct ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,basic_block bb)1469 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1470 				   class ipa_fn_summary *summary,
1471 				   class ipa_node_params *params_summary,
1472 				   basic_block bb)
1473 {
1474   gimple *last;
1475   tree op, op2;
1476   int index;
1477   struct agg_position_info aggpos;
1478   enum tree_code code, inverted_code;
1479   edge e;
1480   edge_iterator ei;
1481   gimple *set_stmt;
1482   tree param_type;
1483   expr_eval_ops param_ops;
1484 
1485   last = last_stmt (bb);
1486   if (!last || gimple_code (last) != GIMPLE_COND)
1487     return;
1488   if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1489     return;
1490   op = gimple_cond_lhs (last);
1491 
1492   if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1493 			    &param_ops))
1494     {
1495       code = gimple_cond_code (last);
1496       inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1497 
1498       FOR_EACH_EDGE (e, ei, bb->succs)
1499 	{
1500 	  enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1501 				      ? code : inverted_code);
1502 	  /* invert_tree_comparison will return ERROR_MARK on FP
1503 	     comparisons that are not EQ/NE instead of returning proper
1504 	     unordered one.  Be sure it is not confused with NON_CONSTANT.
1505 
1506 	     And if the edge's target is the final block of diamond CFG graph
1507 	     of this conditional statement, we do not need to compute
1508 	     predicate for the edge because the final block's predicate must
1509 	     be at least as that of the first block of the statement.  */
1510 	  if (this_code != ERROR_MARK
1511 	      && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1512 	    {
1513 	      predicate p
1514 		= add_condition (summary, params_summary, index,
1515 			       	 param_type, &aggpos,
1516 				 this_code, gimple_cond_rhs (last), param_ops);
1517 	      e->aux = edge_predicate_pool.allocate ();
1518 	      *(predicate *) e->aux = p;
1519 	    }
1520 	}
1521       vec_free (param_ops);
1522     }
1523 
1524   if (TREE_CODE (op) != SSA_NAME)
1525     return;
1526   /* Special case
1527      if (builtin_constant_p (op))
1528      constant_code
1529      else
1530      nonconstant_code.
1531      Here we can predicate nonconstant_code.  We can't
1532      really handle constant_code since we have no predicate
1533      for this and also the constant code is not known to be
1534      optimized away when inliner doesn't see operand is constant.
1535      Other optimizers might think otherwise.  */
1536   if (gimple_cond_code (last) != NE_EXPR
1537       || !integer_zerop (gimple_cond_rhs (last)))
1538     return;
1539   set_stmt = SSA_NAME_DEF_STMT (op);
1540   if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1541       || gimple_call_num_args (set_stmt) != 1)
1542     return;
1543   op2 = gimple_call_arg (set_stmt, 0);
1544   if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1545     return;
1546   FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1547     {
1548       predicate p = add_condition (summary, params_summary, index,
1549 		     		   param_type, &aggpos,
1550 				   predicate::is_not_constant, NULL_TREE);
1551       e->aux = edge_predicate_pool.allocate ();
1552       *(predicate *) e->aux = p;
1553     }
1554 }
1555 
1556 
1557 /* If BB ends by a switch we can turn into predicates, attach corresponding
1558    predicates to the CFG edges.   */
1559 
1560 static void
set_switch_stmt_execution_predicate(struct ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,basic_block bb)1561 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1562 				     class ipa_fn_summary *summary,
1563 				     class ipa_node_params *params_summary,
1564 				     basic_block bb)
1565 {
1566   gimple *lastg;
1567   tree op;
1568   int index;
1569   struct agg_position_info aggpos;
1570   edge e;
1571   edge_iterator ei;
1572   size_t n;
1573   size_t case_idx;
1574   tree param_type;
1575   expr_eval_ops param_ops;
1576 
1577   lastg = last_stmt (bb);
1578   if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1579     return;
1580   gswitch *last = as_a <gswitch *> (lastg);
1581   op = gimple_switch_index (last);
1582   if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1583 			     &param_ops))
1584     return;
1585 
1586   auto_vec<std::pair<tree, tree> > ranges;
1587   tree type = TREE_TYPE (op);
1588   int bound_limit = opt_for_fn (fbi->node->decl,
1589 				param_ipa_max_switch_predicate_bounds);
1590   int bound_count = 0;
1591   wide_int vr_wmin, vr_wmax;
1592   value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1593 
1594   FOR_EACH_EDGE (e, ei, bb->succs)
1595     {
1596       e->aux = edge_predicate_pool.allocate ();
1597       *(predicate *) e->aux = false;
1598     }
1599 
1600   e = gimple_switch_edge (cfun, last, 0);
1601   /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1602      default case if its target basic block is in convergence point of all
1603      switch cases, which can be determined by checking whether it
1604      post-dominates the switch statement.  */
1605   if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1606     bound_count = INT_MAX;
1607 
1608   n = gimple_switch_num_labels (last);
1609   for (case_idx = 1; case_idx < n; ++case_idx)
1610     {
1611       tree cl = gimple_switch_label (last, case_idx);
1612       tree min = CASE_LOW (cl);
1613       tree max = CASE_HIGH (cl);
1614       predicate p;
1615 
1616       e = gimple_switch_edge (cfun, last, case_idx);
1617 
1618       /* The case value might not have same type as switch expression,
1619 	 extend the value based on the expression type.  */
1620       if (TREE_TYPE (min) != type)
1621 	min = wide_int_to_tree (type, wi::to_wide (min));
1622 
1623       if (!max)
1624 	max = min;
1625       else if (TREE_TYPE (max) != type)
1626 	max = wide_int_to_tree (type, wi::to_wide (max));
1627 
1628       /* The case's target basic block is in convergence point of all switch
1629 	 cases, its predicate should be at least as that of the switch
1630 	 statement.  */
1631       if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1632 	p = true;
1633       else if (min == max)
1634 	p = add_condition (summary, params_summary, index, param_type,
1635 		           &aggpos, EQ_EXPR, min, param_ops);
1636       else
1637 	{
1638 	  predicate p1, p2;
1639 	  p1 = add_condition (summary, params_summary, index, param_type,
1640 			      &aggpos, GE_EXPR, min, param_ops);
1641 	  p2 = add_condition (summary,  params_summary,index, param_type,
1642 			      &aggpos, LE_EXPR, max, param_ops);
1643 	  p = p1 & p2;
1644 	}
1645       *(class predicate *) e->aux
1646 	= p.or_with (summary->conds, *(class predicate *) e->aux);
1647 
1648       /* If there are too many disjoint case ranges, predicate for default
1649 	 case might become too complicated.  So add a limit here.  */
1650       if (bound_count > bound_limit)
1651 	continue;
1652 
1653       bool new_range = true;
1654 
1655       if (!ranges.is_empty ())
1656 	{
1657 	  wide_int curr_wmin = wi::to_wide (min);
1658 	  wide_int last_wmax = wi::to_wide (ranges.last ().second);
1659 
1660 	  /* Merge case ranges if they are continuous.  */
1661 	  if (curr_wmin == last_wmax + 1)
1662 	    new_range = false;
1663 	  else if (vr_type == VR_ANTI_RANGE)
1664 	    {
1665 	      /* If two disjoint case ranges can be connected by anti-range
1666 		 of switch index, combine them to one range.  */
1667 	      if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1668 		vr_type = VR_UNDEFINED;
1669 	      else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1670 		new_range = false;
1671 	    }
1672 	}
1673 
1674       /* Create/extend a case range.  And we count endpoints of range set,
1675 	 this number nearly equals to number of conditions that we will create
1676 	 for predicate of default case.  */
1677       if (new_range)
1678 	{
1679 	  bound_count += (min == max) ? 1 : 2;
1680 	  ranges.safe_push (std::make_pair (min, max));
1681 	}
1682       else
1683 	{
1684 	  bound_count += (ranges.last ().first == ranges.last ().second);
1685 	  ranges.last ().second = max;
1686 	}
1687     }
1688 
1689   e = gimple_switch_edge (cfun, last, 0);
1690   if (bound_count > bound_limit)
1691     {
1692       *(class predicate *) e->aux = true;
1693       vec_free (param_ops);
1694       return;
1695     }
1696 
1697   predicate p_seg = true;
1698   predicate p_all = false;
1699 
1700   if (vr_type != VR_RANGE)
1701     {
1702       vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1703       vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1704     }
1705 
1706   /* Construct predicate to represent default range set that is negation of
1707      all case ranges.  Case range is classified as containing single/non-single
1708      values.  Suppose a piece of case ranges in the following.
1709 
1710                 [D1...D2]  [S1] ... [Sn]  [D3...D4]
1711 
1712      To represent default case's range sets between two non-single value
1713      case ranges (From D2 to D3), we construct predicate as:
1714 
1715               D2 < x < D3 && x != S1 && ... && x != Sn
1716    */
1717   for (size_t i = 0; i < ranges.length (); i++)
1718     {
1719       tree min = ranges[i].first;
1720       tree max = ranges[i].second;
1721 
1722       if (min == max)
1723 	p_seg &= add_condition (summary, params_summary, index,
1724 		       		param_type, &aggpos, NE_EXPR,
1725 				min, param_ops);
1726       else
1727 	{
1728 	  /* Do not create sub-predicate for range that is beyond low bound
1729 	     of switch index.  */
1730 	  if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1731 	    {
1732 	      p_seg &= add_condition (summary, params_summary, index,
1733 			     	      param_type, &aggpos,
1734 				      LT_EXPR, min, param_ops);
1735 	      p_all = p_all.or_with (summary->conds, p_seg);
1736 	    }
1737 
1738 	  /* Do not create sub-predicate for range that is beyond up bound
1739 	     of switch index.  */
1740 	  if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1741 	    {
1742 	      p_seg = false;
1743 	      break;
1744 	    }
1745 
1746 	  p_seg = add_condition (summary, params_summary, index,
1747 			 	 param_type, &aggpos, GT_EXPR,
1748 				 max, param_ops);
1749 	}
1750     }
1751 
1752   p_all = p_all.or_with (summary->conds, p_seg);
1753   *(class predicate *) e->aux
1754     = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1755 
1756   vec_free (param_ops);
1757 }
1758 
1759 
1760 /* For each BB in NODE attach to its AUX pointer predicate under
1761    which it is executable.  */
1762 
1763 static void
compute_bb_predicates(struct ipa_func_body_info * fbi,struct cgraph_node * node,class ipa_fn_summary * summary,class ipa_node_params * params_summary)1764 compute_bb_predicates (struct ipa_func_body_info *fbi,
1765 		       struct cgraph_node *node,
1766 		       class ipa_fn_summary *summary,
1767 		       class ipa_node_params *params_summary)
1768 {
1769   struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1770   bool done = false;
1771   basic_block bb;
1772 
1773   FOR_EACH_BB_FN (bb, my_function)
1774     {
1775       set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1776       set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1777     }
1778 
1779   /* Entry block is always executable.  */
1780   ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1781     = edge_predicate_pool.allocate ();
1782   *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1783 
1784   /* A simple dataflow propagation of predicates forward in the CFG.
1785      TODO: work in reverse postorder.  */
1786   while (!done)
1787     {
1788       done = true;
1789       FOR_EACH_BB_FN (bb, my_function)
1790 	{
1791 	  predicate p = false;
1792 	  edge e;
1793 	  edge_iterator ei;
1794 	  FOR_EACH_EDGE (e, ei, bb->preds)
1795 	    {
1796 	      if (e->src->aux)
1797 		{
1798 		  predicate this_bb_predicate
1799 		    = *(predicate *) e->src->aux;
1800 		  if (e->aux)
1801 		    this_bb_predicate &= (*(class predicate *) e->aux);
1802 		  p = p.or_with (summary->conds, this_bb_predicate);
1803 		  if (p == true)
1804 		    break;
1805 		}
1806 	    }
1807 	  if (p != false)
1808 	    {
1809 	      basic_block pdom_bb;
1810 
1811 	      if (!bb->aux)
1812 		{
1813 		  done = false;
1814 		  bb->aux = edge_predicate_pool.allocate ();
1815 		  *((predicate *) bb->aux) = p;
1816 		}
1817 	      else if (p != *(predicate *) bb->aux)
1818 		{
1819 		  /* This OR operation is needed to ensure monotonous data flow
1820 		     in the case we hit the limit on number of clauses and the
1821 		     and/or operations above give approximate answers.  */
1822 		  p = p.or_with (summary->conds, *(predicate *)bb->aux);
1823 	          if (p != *(predicate *) bb->aux)
1824 		    {
1825 		      done = false;
1826 		      *((predicate *) bb->aux) = p;
1827 		    }
1828 		}
1829 
1830 	      /* For switch/if statement, we can OR-combine predicates of all
1831 		 its cases/branches to get predicate for basic block in their
1832 		 convergence point, but sometimes this will generate very
1833 		 complicated predicate.  Actually, we can get simplified
1834 		 predicate in another way by using the fact that predicate
1835 		 for a basic block must also hold true for its post dominators.
1836 		 To be specific, basic block in convergence point of
1837 		 conditional statement should include predicate of the
1838 		 statement.  */
1839 	      pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1840 	      if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1841 		;
1842 	      else if (!pdom_bb->aux)
1843 		{
1844 		  done = false;
1845 		  pdom_bb->aux = edge_predicate_pool.allocate ();
1846 		  *((predicate *) pdom_bb->aux) = p;
1847 		}
1848 	      else if (p != *(predicate *) pdom_bb->aux)
1849 		{
1850 		  p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1851 		  if (p != *(predicate *) pdom_bb->aux)
1852 		    {
1853 		      done = false;
1854 		      *((predicate *) pdom_bb->aux) = p;
1855 		    }
1856 		}
1857 	    }
1858 	}
1859     }
1860 }
1861 
1862 
1863 /* Return predicate specifying when the STMT might have result that is not
1864    a compile time constant.  */
1865 
1866 static predicate
will_be_nonconstant_expr_predicate(ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,tree expr,vec<predicate> nonconstant_names)1867 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1868 				    class ipa_fn_summary *summary,
1869 				    class ipa_node_params *params_summary,
1870 				    tree expr,
1871 				    vec<predicate> nonconstant_names)
1872 {
1873   tree parm;
1874   int index;
1875 
1876   while (UNARY_CLASS_P (expr))
1877     expr = TREE_OPERAND (expr, 0);
1878 
1879   parm = unmodified_parm (fbi, NULL, expr, NULL);
1880   if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1881     return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1882 			  predicate::changed, NULL_TREE);
1883   if (is_gimple_min_invariant (expr))
1884     return false;
1885   if (TREE_CODE (expr) == SSA_NAME)
1886     return nonconstant_names[SSA_NAME_VERSION (expr)];
1887   if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1888     {
1889       predicate p1
1890 	= will_be_nonconstant_expr_predicate (fbi, summary,
1891 					      params_summary,
1892 					      TREE_OPERAND (expr, 0),
1893 					      nonconstant_names);
1894       if (p1 == true)
1895 	return p1;
1896 
1897       predicate p2
1898 	= will_be_nonconstant_expr_predicate (fbi, summary,
1899 					      params_summary,
1900 					      TREE_OPERAND (expr, 1),
1901 					      nonconstant_names);
1902       return p1.or_with (summary->conds, p2);
1903     }
1904   else if (TREE_CODE (expr) == COND_EXPR)
1905     {
1906       predicate p1
1907 	= will_be_nonconstant_expr_predicate (fbi, summary,
1908 					      params_summary,
1909 					      TREE_OPERAND (expr, 0),
1910 					      nonconstant_names);
1911       if (p1 == true)
1912 	return p1;
1913 
1914       predicate p2
1915 	= will_be_nonconstant_expr_predicate (fbi, summary,
1916 					      params_summary,
1917 					      TREE_OPERAND (expr, 1),
1918 					      nonconstant_names);
1919       if (p2 == true)
1920 	return p2;
1921       p1 = p1.or_with (summary->conds, p2);
1922       p2 = will_be_nonconstant_expr_predicate (fbi, summary,
1923 					       params_summary,
1924 					       TREE_OPERAND (expr, 2),
1925 					       nonconstant_names);
1926       return p2.or_with (summary->conds, p1);
1927     }
1928   else if (TREE_CODE (expr) == CALL_EXPR)
1929     return true;
1930   else
1931     {
1932       debug_tree (expr);
1933       gcc_unreachable ();
1934     }
1935   return false;
1936 }
1937 
1938 
1939 /* Return predicate specifying when the STMT might have result that is not
1940    a compile time constant.  */
1941 
1942 static predicate
will_be_nonconstant_predicate(struct ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,gimple * stmt,vec<predicate> nonconstant_names)1943 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1944 			       class ipa_fn_summary *summary,
1945 			       class ipa_node_params *params_summary,
1946 			       gimple *stmt,
1947 			       vec<predicate> nonconstant_names)
1948 {
1949   predicate p = true;
1950   ssa_op_iter iter;
1951   tree use;
1952   tree param_type = NULL_TREE;
1953   predicate op_non_const;
1954   bool is_load;
1955   int base_index;
1956   struct agg_position_info aggpos;
1957 
1958   /* What statements might be optimized away
1959      when their arguments are constant.  */
1960   if (gimple_code (stmt) != GIMPLE_ASSIGN
1961       && gimple_code (stmt) != GIMPLE_COND
1962       && gimple_code (stmt) != GIMPLE_SWITCH
1963       && (gimple_code (stmt) != GIMPLE_CALL
1964 	  || !(gimple_call_flags (stmt) & ECF_CONST)))
1965     return p;
1966 
1967   /* Stores will stay anyway.  */
1968   if (gimple_store_p (stmt))
1969     return p;
1970 
1971   is_load = gimple_assign_load_p (stmt);
1972 
1973   /* Loads can be optimized when the value is known.  */
1974   if (is_load)
1975     {
1976       tree op = gimple_assign_rhs1 (stmt);
1977       if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
1978 				 &aggpos))
1979 	return p;
1980     }
1981   else
1982     base_index = -1;
1983 
1984   /* See if we understand all operands before we start
1985      adding conditionals.  */
1986   FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1987     {
1988       tree parm = unmodified_parm (fbi, stmt, use, NULL);
1989       /* For arguments we can build a condition.  */
1990       if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1991 	continue;
1992       if (TREE_CODE (use) != SSA_NAME)
1993 	return p;
1994       /* If we know when operand is constant,
1995 	 we still can say something useful.  */
1996       if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1997 	continue;
1998       return p;
1999     }
2000 
2001   if (is_load)
2002     op_non_const =
2003       add_condition (summary, params_summary,
2004 		     base_index, param_type, &aggpos,
2005 		     predicate::changed, NULL_TREE);
2006   else
2007     op_non_const = false;
2008   FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2009     {
2010       tree parm = unmodified_parm (fbi, stmt, use, NULL);
2011       int index;
2012 
2013       if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2014 	{
2015 	  if (index != base_index)
2016 	    p = add_condition (summary, params_summary, index,
2017 			       TREE_TYPE (parm), NULL,
2018 			       predicate::changed, NULL_TREE);
2019 	  else
2020 	    continue;
2021 	}
2022       else
2023 	p = nonconstant_names[SSA_NAME_VERSION (use)];
2024       op_non_const = p.or_with (summary->conds, op_non_const);
2025     }
2026   if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2027       && gimple_op (stmt, 0)
2028       && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2029     nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2030       = op_non_const;
2031   return op_non_const;
2032 }
2033 
2034 struct record_modified_bb_info
2035 {
2036   tree op;
2037   bitmap bb_set;
2038   gimple *stmt;
2039 };
2040 
2041 /* Value is initialized in INIT_BB and used in USE_BB.  We want to compute
2042    probability how often it changes between USE_BB.
2043    INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2044    is in different loop nest, we can do better.
2045    This is all just estimate.  In theory we look for minimal cut separating
2046    INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2047    anyway.  */
2048 
2049 static basic_block
get_minimal_bb(basic_block init_bb,basic_block use_bb)2050 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2051 {
2052   class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2053   if (l && l->header->count < init_bb->count)
2054     return l->header;
2055   return init_bb;
2056 }
2057 
2058 /* Callback of walk_aliased_vdefs.  Records basic blocks where the value may be
2059    set except for info->stmt.  */
2060 
2061 static bool
record_modified(ao_ref * ao ATTRIBUTE_UNUSED,tree vdef,void * data)2062 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2063 {
2064   struct record_modified_bb_info *info =
2065     (struct record_modified_bb_info *) data;
2066   if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2067     return false;
2068   if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2069     return false;
2070   bitmap_set_bit (info->bb_set,
2071 		  SSA_NAME_IS_DEFAULT_DEF (vdef)
2072 		  ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2073 		  : get_minimal_bb
2074 			 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2075 			  gimple_bb (info->stmt))->index);
2076   if (dump_file)
2077     {
2078       fprintf (dump_file, "     Param ");
2079       print_generic_expr (dump_file, info->op, TDF_SLIM);
2080       fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2081 	       gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2082 	       get_minimal_bb
2083 			 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2084 			  gimple_bb (info->stmt))->index);
2085       print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2086     }
2087   return false;
2088 }
2089 
2090 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2091    will change since last invocation of STMT.
2092 
2093    Value 0 is reserved for compile time invariants.
2094    For common parameters it is REG_BR_PROB_BASE.  For loop invariants it
2095    ought to be REG_BR_PROB_BASE / estimated_iters.  */
2096 
2097 static int
param_change_prob(ipa_func_body_info * fbi,gimple * stmt,int i)2098 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2099 {
2100   tree op = gimple_call_arg (stmt, i);
2101   basic_block bb = gimple_bb (stmt);
2102 
2103   if (TREE_CODE (op) == WITH_SIZE_EXPR)
2104     op = TREE_OPERAND (op, 0);
2105 
2106   tree base = get_base_address (op);
2107 
2108   /* Global invariants never change.  */
2109   if (is_gimple_min_invariant (base))
2110     return 0;
2111 
2112   /* We would have to do non-trivial analysis to really work out what
2113      is the probability of value to change (i.e. when init statement
2114      is in a sibling loop of the call).
2115 
2116      We do an conservative estimate: when call is executed N times more often
2117      than the statement defining value, we take the frequency 1/N.  */
2118   if (TREE_CODE (base) == SSA_NAME)
2119     {
2120       profile_count init_count;
2121 
2122       if (!bb->count.nonzero_p ())
2123 	return REG_BR_PROB_BASE;
2124 
2125       if (SSA_NAME_IS_DEFAULT_DEF (base))
2126 	init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2127       else
2128 	init_count = get_minimal_bb
2129 		      (gimple_bb (SSA_NAME_DEF_STMT (base)),
2130 		       gimple_bb (stmt))->count;
2131 
2132       if (init_count < bb->count)
2133         return MAX ((init_count.to_sreal_scale (bb->count)
2134 		     * REG_BR_PROB_BASE).to_int (), 1);
2135       return REG_BR_PROB_BASE;
2136     }
2137   else
2138     {
2139       ao_ref refd;
2140       profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2141       struct record_modified_bb_info info;
2142       tree init = ctor_for_folding (base);
2143 
2144       if (init != error_mark_node)
2145 	return 0;
2146       if (!bb->count.nonzero_p ())
2147 	return REG_BR_PROB_BASE;
2148       if (dump_file)
2149 	{
2150 	  fprintf (dump_file, "     Analyzing param change probability of ");
2151           print_generic_expr (dump_file, op, TDF_SLIM);
2152 	  fprintf (dump_file, "\n");
2153 	}
2154       ao_ref_init (&refd, op);
2155       info.op = op;
2156       info.stmt = stmt;
2157       info.bb_set = BITMAP_ALLOC (NULL);
2158       int walked
2159 	= walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2160 			      NULL, NULL, fbi->aa_walk_budget);
2161       if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2162 	{
2163 	  if (dump_file)
2164 	    {
2165 	      if (walked < 0)
2166 		fprintf (dump_file, "     Ran out of AA walking budget.\n");
2167 	      else
2168 		fprintf (dump_file, "     Set in same BB as used.\n");
2169 	    }
2170 	  BITMAP_FREE (info.bb_set);
2171 	  return REG_BR_PROB_BASE;
2172 	}
2173 
2174       bitmap_iterator bi;
2175       unsigned index;
2176       /* Lookup the most frequent update of the value and believe that
2177 	 it dominates all the other; precise analysis here is difficult.  */
2178       EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2179 	max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2180       if (dump_file)
2181 	{
2182           fprintf (dump_file, "     Set with count ");
2183 	  max.dump (dump_file);
2184           fprintf (dump_file, " and used with count ");
2185 	  bb->count.dump (dump_file);
2186           fprintf (dump_file, " freq %f\n",
2187 		   max.to_sreal_scale (bb->count).to_double ());
2188 	}
2189 
2190       BITMAP_FREE (info.bb_set);
2191       if (max < bb->count)
2192         return MAX ((max.to_sreal_scale (bb->count)
2193 		     * REG_BR_PROB_BASE).to_int (), 1);
2194       return REG_BR_PROB_BASE;
2195     }
2196 }
2197 
2198 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2199    sub-graph and if the predicate the condition depends on is known.  If so,
2200    return true and store the pointer the predicate in *P.  */
2201 
2202 static bool
phi_result_unknown_predicate(ipa_func_body_info * fbi,ipa_fn_summary * summary,class ipa_node_params * params_summary,basic_block bb,predicate * p,vec<predicate> nonconstant_names)2203 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2204 			      ipa_fn_summary *summary,
2205 			      class ipa_node_params *params_summary,
2206 			      basic_block bb,
2207 			      predicate *p,
2208 			      vec<predicate> nonconstant_names)
2209 {
2210   edge e;
2211   edge_iterator ei;
2212   basic_block first_bb = NULL;
2213   gimple *stmt;
2214 
2215   if (single_pred_p (bb))
2216     {
2217       *p = false;
2218       return true;
2219     }
2220 
2221   FOR_EACH_EDGE (e, ei, bb->preds)
2222     {
2223       if (single_succ_p (e->src))
2224 	{
2225 	  if (!single_pred_p (e->src))
2226 	    return false;
2227 	  if (!first_bb)
2228 	    first_bb = single_pred (e->src);
2229 	  else if (single_pred (e->src) != first_bb)
2230 	    return false;
2231 	}
2232       else
2233 	{
2234 	  if (!first_bb)
2235 	    first_bb = e->src;
2236 	  else if (e->src != first_bb)
2237 	    return false;
2238 	}
2239     }
2240 
2241   if (!first_bb)
2242     return false;
2243 
2244   stmt = last_stmt (first_bb);
2245   if (!stmt
2246       || gimple_code (stmt) != GIMPLE_COND
2247       || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2248     return false;
2249 
2250   *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2251 					   gimple_cond_lhs (stmt),
2252 					   nonconstant_names);
2253   if (*p == true)
2254     return false;
2255   else
2256     return true;
2257 }
2258 
2259 /* Given a PHI statement in a function described by inline properties SUMMARY
2260    and *P being the predicate describing whether the selected PHI argument is
2261    known, store a predicate for the result of the PHI statement into
2262    NONCONSTANT_NAMES, if possible.  */
2263 
2264 static void
predicate_for_phi_result(class ipa_fn_summary * summary,gphi * phi,predicate * p,vec<predicate> nonconstant_names)2265 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2266 			  predicate *p,
2267 			  vec<predicate> nonconstant_names)
2268 {
2269   unsigned i;
2270 
2271   for (i = 0; i < gimple_phi_num_args (phi); i++)
2272     {
2273       tree arg = gimple_phi_arg (phi, i)->def;
2274       if (!is_gimple_min_invariant (arg))
2275 	{
2276 	  gcc_assert (TREE_CODE (arg) == SSA_NAME);
2277 	  *p = p->or_with (summary->conds,
2278 			   nonconstant_names[SSA_NAME_VERSION (arg)]);
2279 	  if (*p == true)
2280 	    return;
2281 	}
2282     }
2283 
2284   if (dump_file && (dump_flags & TDF_DETAILS))
2285     {
2286       fprintf (dump_file, "\t\tphi predicate: ");
2287       p->dump (dump_file, summary->conds);
2288     }
2289   nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2290 }
2291 
2292 /* For a typical usage of __builtin_expect (a<b, 1), we
2293    may introduce an extra relation stmt:
2294    With the builtin, we have
2295      t1 = a <= b;
2296      t2 = (long int) t1;
2297      t3 = __builtin_expect (t2, 1);
2298      if (t3 != 0)
2299        goto ...
2300    Without the builtin, we have
2301      if (a<=b)
2302        goto...
2303    This affects the size/time estimation and may have
2304    an impact on the earlier inlining.
2305    Here find this pattern and fix it up later.  */
2306 
2307 static gimple *
find_foldable_builtin_expect(basic_block bb)2308 find_foldable_builtin_expect (basic_block bb)
2309 {
2310   gimple_stmt_iterator bsi;
2311 
2312   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2313     {
2314       gimple *stmt = gsi_stmt (bsi);
2315       if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2316 	  || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2317 	  || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2318         {
2319           tree var = gimple_call_lhs (stmt);
2320           tree arg = gimple_call_arg (stmt, 0);
2321           use_operand_p use_p;
2322 	  gimple *use_stmt;
2323           bool match = false;
2324           bool done = false;
2325 
2326           if (!var || !arg)
2327             continue;
2328           gcc_assert (TREE_CODE (var) == SSA_NAME);
2329 
2330           while (TREE_CODE (arg) == SSA_NAME)
2331             {
2332 	      gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2333               if (!is_gimple_assign (stmt_tmp))
2334                 break;
2335               switch (gimple_assign_rhs_code (stmt_tmp))
2336                 {
2337                   case LT_EXPR:
2338                   case LE_EXPR:
2339                   case GT_EXPR:
2340                   case GE_EXPR:
2341                   case EQ_EXPR:
2342                   case NE_EXPR:
2343                     match = true;
2344                     done = true;
2345                     break;
2346                   CASE_CONVERT:
2347                     break;
2348                   default:
2349                     done = true;
2350                     break;
2351                 }
2352               if (done)
2353                 break;
2354               arg = gimple_assign_rhs1 (stmt_tmp);
2355             }
2356 
2357           if (match && single_imm_use (var, &use_p, &use_stmt)
2358               && gimple_code (use_stmt) == GIMPLE_COND)
2359             return use_stmt;
2360         }
2361     }
2362   return NULL;
2363 }
2364 
2365 /* Return true when the basic blocks contains only clobbers followed by RESX.
2366    Such BBs are kept around to make removal of dead stores possible with
2367    presence of EH and will be optimized out by optimize_clobbers later in the
2368    game.
2369 
2370    NEED_EH is used to recurse in case the clobber has non-EH predecessors
2371    that can be clobber only, too.. When it is false, the RESX is not necessary
2372    on the end of basic block.  */
2373 
2374 static bool
2375 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2376 {
2377   gimple_stmt_iterator gsi = gsi_last_bb (bb);
2378   edge_iterator ei;
2379   edge e;
2380 
2381   if (need_eh)
2382     {
2383       if (gsi_end_p (gsi))
2384 	return false;
2385       if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2386         return false;
2387       gsi_prev (&gsi);
2388     }
2389   else if (!single_succ_p (bb))
2390     return false;
2391 
2392   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2393     {
2394       gimple *stmt = gsi_stmt (gsi);
2395       if (is_gimple_debug (stmt))
2396 	continue;
2397       if (gimple_clobber_p (stmt))
2398 	continue;
2399       if (gimple_code (stmt) == GIMPLE_LABEL)
2400 	break;
2401       return false;
2402     }
2403 
2404   /* See if all predecessors are either throws or clobber only BBs.  */
2405   FOR_EACH_EDGE (e, ei, bb->preds)
2406     if (!(e->flags & EDGE_EH)
2407 	&& !clobber_only_eh_bb_p (e->src, false))
2408       return false;
2409 
2410   return true;
2411 }
2412 
2413 /* Return true if STMT compute a floating point expression that may be affected
2414    by -ffast-math and similar flags.  */
2415 
2416 static bool
fp_expression_p(gimple * stmt)2417 fp_expression_p (gimple *stmt)
2418 {
2419   ssa_op_iter i;
2420   tree op;
2421 
2422   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2423     if (FLOAT_TYPE_P (TREE_TYPE (op)))
2424       return true;
2425   return false;
2426 }
2427 
2428 /* Analyze function body for NODE.
2429    EARLY indicates run from early optimization pipeline.  */
2430 
2431 static void
analyze_function_body(struct cgraph_node * node,bool early)2432 analyze_function_body (struct cgraph_node *node, bool early)
2433 {
2434   sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2435   /* Estimate static overhead for function prologue/epilogue and alignment. */
2436   int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2437   /* Benefits are scaled by probability of elimination that is in range
2438      <0,2>.  */
2439   basic_block bb;
2440   struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2441   sreal freq;
2442   class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2443   class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
2444   predicate bb_predicate;
2445   struct ipa_func_body_info fbi;
2446   vec<predicate> nonconstant_names = vNULL;
2447   int nblocks, n;
2448   int *order;
2449   gimple *fix_builtin_expect_stmt;
2450 
2451   gcc_assert (my_function && my_function->cfg);
2452   gcc_assert (cfun == my_function);
2453 
2454   memset(&fbi, 0, sizeof(fbi));
2455   vec_free (info->conds);
2456   info->conds = NULL;
2457   vec_free (info->size_time_table);
2458   info->size_time_table = NULL;
2459 
2460   /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2461      so we can produce proper inline hints.
2462 
2463      When optimizing and analyzing for early inliner, initialize node params
2464      so we can produce correct BB predicates.  */
2465 
2466   if (opt_for_fn (node->decl, optimize))
2467     {
2468       calculate_dominance_info (CDI_DOMINATORS);
2469       calculate_dominance_info (CDI_POST_DOMINATORS);
2470       if (!early)
2471         loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2472       else
2473 	{
2474 	  ipa_check_create_node_params ();
2475 	  ipa_initialize_node_params (node);
2476 	}
2477 
2478       if (ipa_node_params_sum)
2479 	{
2480 	  fbi.node = node;
2481 	  fbi.info = IPA_NODE_REF (node);
2482 	  fbi.bb_infos = vNULL;
2483 	  fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2484 	  fbi.param_count = count_formal_params (node->decl);
2485 	  fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2486 
2487 	  nonconstant_names.safe_grow_cleared
2488 	    (SSANAMES (my_function)->length ());
2489 	}
2490     }
2491 
2492   if (dump_file)
2493     fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2494 	     node->dump_name ());
2495 
2496   /* When we run into maximal number of entries, we assign everything to the
2497      constant truth case.  Be sure to have it in list. */
2498   bb_predicate = true;
2499   info->account_size_time (0, 0, bb_predicate, bb_predicate);
2500 
2501   bb_predicate = predicate::not_inlined ();
2502   info->account_size_time (opt_for_fn (node->decl,
2503 				param_uninlined_function_insns)
2504 			   * ipa_fn_summary::size_scale,
2505 			   opt_for_fn (node->decl,
2506 				param_uninlined_function_time),
2507 			   bb_predicate,
2508 		           bb_predicate);
2509 
2510   if (fbi.info)
2511     compute_bb_predicates (&fbi, node, info, params_summary);
2512   order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2513   nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2514   for (n = 0; n < nblocks; n++)
2515     {
2516       bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2517       freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2518       if (clobber_only_eh_bb_p (bb))
2519 	{
2520 	  if (dump_file && (dump_flags & TDF_DETAILS))
2521 	    fprintf (dump_file, "\n Ignoring BB %i;"
2522 		     " it will be optimized away by cleanup_clobbers\n",
2523 		     bb->index);
2524 	  continue;
2525 	}
2526 
2527       /* TODO: Obviously predicates can be propagated down across CFG.  */
2528       if (fbi.info)
2529 	{
2530 	  if (bb->aux)
2531 	    bb_predicate = *(predicate *) bb->aux;
2532 	  else
2533 	    bb_predicate = false;
2534 	}
2535       else
2536 	bb_predicate = true;
2537 
2538       if (dump_file && (dump_flags & TDF_DETAILS))
2539 	{
2540 	  fprintf (dump_file, "\n BB %i predicate:", bb->index);
2541 	  bb_predicate.dump (dump_file, info->conds);
2542 	}
2543 
2544       if (fbi.info && nonconstant_names.exists ())
2545 	{
2546 	  predicate phi_predicate;
2547 	  bool first_phi = true;
2548 
2549 	  for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2550 	       gsi_next (&bsi))
2551 	    {
2552 	      if (first_phi
2553 		  && !phi_result_unknown_predicate (&fbi, info,
2554 			  			    params_summary,
2555 			 			    bb,
2556 						    &phi_predicate,
2557 						    nonconstant_names))
2558 		break;
2559 	      first_phi = false;
2560 	      if (dump_file && (dump_flags & TDF_DETAILS))
2561 		{
2562 		  fprintf (dump_file, "  ");
2563 		  print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2564 		}
2565 	      predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2566 					nonconstant_names);
2567 	    }
2568 	}
2569 
2570       fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2571 
2572       for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2573 	   !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2574 	{
2575 	  gimple *stmt = gsi_stmt (bsi);
2576 	  int this_size = estimate_num_insns (stmt, &eni_size_weights);
2577 	  int this_time = estimate_num_insns (stmt, &eni_time_weights);
2578 	  int prob;
2579 	  predicate will_be_nonconstant;
2580 
2581           /* This relation stmt should be folded after we remove
2582              __builtin_expect call. Adjust the cost here.  */
2583 	  if (stmt == fix_builtin_expect_stmt)
2584             {
2585               this_size--;
2586               this_time--;
2587             }
2588 
2589 	  if (dump_file && (dump_flags & TDF_DETAILS))
2590 	    {
2591 	      fprintf (dump_file, "  ");
2592 	      print_gimple_stmt (dump_file, stmt, 0);
2593 	      fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2594 		       freq.to_double (), this_size,
2595 		       this_time);
2596 	    }
2597 
2598 	  if (is_gimple_call (stmt)
2599 	      && !gimple_call_internal_p (stmt))
2600 	    {
2601 	      struct cgraph_edge *edge = node->get_edge (stmt);
2602 	      ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2603 
2604 	      /* Special case: results of BUILT_IN_CONSTANT_P will be always
2605 	         resolved as constant.  We however don't want to optimize
2606 	         out the cgraph edges.  */
2607 	      if (nonconstant_names.exists ()
2608 		  && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2609 		  && gimple_call_lhs (stmt)
2610 		  && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2611 		{
2612 		  predicate false_p = false;
2613 		  nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2614 		    = false_p;
2615 		}
2616 	      if (ipa_node_params_sum)
2617 		{
2618 		  int count = gimple_call_num_args (stmt);
2619 		  int i;
2620 
2621 		  if (count)
2622 		    es->param.safe_grow_cleared (count);
2623 		  for (i = 0; i < count; i++)
2624 		    {
2625 		      int prob = param_change_prob (&fbi, stmt, i);
2626 		      gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2627 		      es->param[i].change_prob = prob;
2628 		    }
2629 		}
2630 
2631 	      es->call_stmt_size = this_size;
2632 	      es->call_stmt_time = this_time;
2633 	      es->loop_depth = bb_loop_depth (bb);
2634 	      edge_set_predicate (edge, &bb_predicate);
2635 	      if (edge->speculative)
2636 		{
2637 		  cgraph_edge *indirect
2638 			= edge->speculative_call_indirect_edge ();
2639 	          ipa_call_summary *es2
2640 			 = ipa_call_summaries->get_create (indirect);
2641 		  ipa_call_summaries->duplicate (edge, indirect,
2642 						 es, es2);
2643 
2644 		  /* Edge is the first direct call.
2645 		     create and duplicate call summaries for multiple
2646 		     speculative call targets.  */
2647 		  for (cgraph_edge *direct
2648 			 = edge->next_speculative_call_target ();
2649 		       direct;
2650 		       direct = direct->next_speculative_call_target ())
2651 		    {
2652 		      ipa_call_summary *es3
2653 			= ipa_call_summaries->get_create (direct);
2654 		      ipa_call_summaries->duplicate (edge, direct,
2655 						     es, es3);
2656 		    }
2657 		}
2658 	    }
2659 
2660 	  /* TODO: When conditional jump or switch is known to be constant, but
2661 	     we did not translate it into the predicates, we really can account
2662 	     just maximum of the possible paths.  */
2663 	  if (fbi.info)
2664 	    will_be_nonconstant
2665 	      = will_be_nonconstant_predicate (&fbi, info, params_summary,
2666 					       stmt, nonconstant_names);
2667 	  else
2668 	    will_be_nonconstant = true;
2669 	  if (this_time || this_size)
2670 	    {
2671 	      sreal final_time = (sreal)this_time * freq;
2672 
2673 	      prob = eliminated_by_inlining_prob (&fbi, stmt);
2674 	      if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2675 		fprintf (dump_file,
2676 			 "\t\t50%% will be eliminated by inlining\n");
2677 	      if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2678 		fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2679 
2680 	      class predicate p = bb_predicate & will_be_nonconstant;
2681 
2682 	      /* We can ignore statement when we proved it is never going
2683 		 to happen, but we cannot do that for call statements
2684 		 because edges are accounted specially.  */
2685 
2686 	      if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2687 		{
2688 		  time += final_time;
2689 		  size += this_size;
2690 		}
2691 
2692 	      /* We account everything but the calls.  Calls have their own
2693 	         size/time info attached to cgraph edges.  This is necessary
2694 	         in order to make the cost disappear after inlining.  */
2695 	      if (!is_gimple_call (stmt))
2696 		{
2697 		  if (prob)
2698 		    {
2699 		      predicate ip = bb_predicate & predicate::not_inlined ();
2700 		      info->account_size_time (this_size * prob,
2701 					       (final_time * prob) / 2, ip,
2702 					       p);
2703 		    }
2704 		  if (prob != 2)
2705 		    info->account_size_time (this_size * (2 - prob),
2706 					     (final_time * (2 - prob) / 2),
2707 					     bb_predicate,
2708 					     p);
2709 		}
2710 
2711 	      if (!info->fp_expressions && fp_expression_p (stmt))
2712 		{
2713 		  info->fp_expressions = true;
2714 		  if (dump_file)
2715 		    fprintf (dump_file, "   fp_expression set\n");
2716 		}
2717 	    }
2718 
2719 	  /* Account cost of address calculations in the statements.  */
2720 	  for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2721 	    {
2722 	      for (tree op = gimple_op (stmt, i);
2723 		   op && handled_component_p (op);
2724 		   op = TREE_OPERAND (op, 0))
2725 	        if ((TREE_CODE (op) == ARRAY_REF
2726 		     || TREE_CODE (op) == ARRAY_RANGE_REF)
2727 		    && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2728 		  {
2729 		    predicate p = bb_predicate;
2730 		    if (fbi.info)
2731 		      p = p & will_be_nonconstant_expr_predicate
2732 				 (&fbi, info, params_summary,
2733 				  TREE_OPERAND (op, 1),
2734 			          nonconstant_names);
2735 		    if (p != false)
2736 		      {
2737 			time += freq;
2738 			size += 1;
2739 			if (dump_file)
2740 			  fprintf (dump_file,
2741 				   "\t\tAccounting address calculation.\n");
2742 			info->account_size_time (ipa_fn_summary::size_scale,
2743 						 freq,
2744 						 bb_predicate,
2745 						 p);
2746 		      }
2747 		  }
2748 	    }
2749 
2750 	}
2751     }
2752   free (order);
2753 
2754   if (nonconstant_names.exists () && !early)
2755     {
2756       class loop *loop;
2757       predicate loop_iterations = true;
2758       predicate loop_stride = true;
2759 
2760       if (dump_file && (dump_flags & TDF_DETAILS))
2761 	flow_loops_dump (dump_file, NULL, 0);
2762       scev_initialize ();
2763       FOR_EACH_LOOP (loop, 0)
2764 	{
2765 	  vec<edge> exits;
2766 	  edge ex;
2767 	  unsigned int j;
2768 	  class tree_niter_desc niter_desc;
2769 	  if (loop->header->aux)
2770 	    bb_predicate = *(predicate *) loop->header->aux;
2771 	  else
2772 	    bb_predicate = false;
2773 
2774 	  exits = get_loop_exit_edges (loop);
2775 	  FOR_EACH_VEC_ELT (exits, j, ex)
2776 	    if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2777 		&& !is_gimple_min_invariant (niter_desc.niter))
2778 	    {
2779 	      predicate will_be_nonconstant
2780 		= will_be_nonconstant_expr_predicate (&fbi, info,
2781 						      params_summary,
2782 						      niter_desc.niter,
2783 						      nonconstant_names);
2784 	      if (will_be_nonconstant != true)
2785 		will_be_nonconstant = bb_predicate & will_be_nonconstant;
2786 	      if (will_be_nonconstant != true
2787 		  && will_be_nonconstant != false)
2788 		/* This is slightly inprecise.  We may want to represent each
2789 		   loop with independent predicate.  */
2790 		loop_iterations &= will_be_nonconstant;
2791 	    }
2792 	  exits.release ();
2793 	}
2794 
2795       /* To avoid quadratic behavior we analyze stride predicates only
2796          with respect to the containing loop.  Thus we simply iterate
2797 	 over all defs in the outermost loop body.  */
2798       for (loop = loops_for_fn (cfun)->tree_root->inner;
2799 	   loop != NULL; loop = loop->next)
2800 	{
2801 	  basic_block *body = get_loop_body (loop);
2802 	  for (unsigned i = 0; i < loop->num_nodes; i++)
2803 	    {
2804 	      gimple_stmt_iterator gsi;
2805 	      if (body[i]->aux)
2806 		bb_predicate = *(predicate *) body[i]->aux;
2807 	      else
2808 		bb_predicate = false;
2809 	      for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2810 		   gsi_next (&gsi))
2811 		{
2812 		  gimple *stmt = gsi_stmt (gsi);
2813 
2814 		  if (!is_gimple_assign (stmt))
2815 		    continue;
2816 
2817 		  tree def = gimple_assign_lhs (stmt);
2818 		  if (TREE_CODE (def) != SSA_NAME)
2819 		    continue;
2820 
2821 		  affine_iv iv;
2822 		  if (!simple_iv (loop_containing_stmt (stmt),
2823 				  loop_containing_stmt (stmt),
2824 				  def, &iv, true)
2825 		      || is_gimple_min_invariant (iv.step))
2826 		    continue;
2827 
2828 		  predicate will_be_nonconstant
2829 		    = will_be_nonconstant_expr_predicate (&fbi, info,
2830 				    			  params_summary,
2831 				   			  iv.step,
2832 							  nonconstant_names);
2833 		  if (will_be_nonconstant != true)
2834 		    will_be_nonconstant = bb_predicate & will_be_nonconstant;
2835 		  if (will_be_nonconstant != true
2836 		      && will_be_nonconstant != false)
2837 		    /* This is slightly inprecise.  We may want to represent
2838 		       each loop with independent predicate.  */
2839 		    loop_stride = loop_stride & will_be_nonconstant;
2840 		}
2841 	    }
2842 	  free (body);
2843 	}
2844       ipa_fn_summary *s = ipa_fn_summaries->get (node);
2845       set_hint_predicate (&s->loop_iterations, loop_iterations);
2846       set_hint_predicate (&s->loop_stride, loop_stride);
2847       scev_finalize ();
2848     }
2849   FOR_ALL_BB_FN (bb, my_function)
2850     {
2851       edge e;
2852       edge_iterator ei;
2853 
2854       if (bb->aux)
2855 	edge_predicate_pool.remove ((predicate *)bb->aux);
2856       bb->aux = NULL;
2857       FOR_EACH_EDGE (e, ei, bb->succs)
2858 	{
2859 	  if (e->aux)
2860 	    edge_predicate_pool.remove ((predicate *) e->aux);
2861 	  e->aux = NULL;
2862 	}
2863     }
2864   ipa_fn_summary *s = ipa_fn_summaries->get (node);
2865   ipa_size_summary *ss = ipa_size_summaries->get (node);
2866   s->time = time;
2867   ss->self_size = size;
2868   nonconstant_names.release ();
2869   ipa_release_body_info (&fbi);
2870   if (opt_for_fn (node->decl, optimize))
2871     {
2872       if (!early)
2873         loop_optimizer_finalize ();
2874       else if (!ipa_edge_args_sum)
2875 	ipa_free_all_node_params ();
2876       free_dominance_info (CDI_DOMINATORS);
2877       free_dominance_info (CDI_POST_DOMINATORS);
2878     }
2879   if (dump_file)
2880     {
2881       fprintf (dump_file, "\n");
2882       ipa_dump_fn_summary (dump_file, node);
2883     }
2884 }
2885 
2886 
2887 /* Compute function summary.
2888    EARLY is true when we compute parameters during early opts.  */
2889 
2890 void
compute_fn_summary(struct cgraph_node * node,bool early)2891 compute_fn_summary (struct cgraph_node *node, bool early)
2892 {
2893   HOST_WIDE_INT self_stack_size;
2894   struct cgraph_edge *e;
2895 
2896   gcc_assert (!node->inlined_to);
2897 
2898   if (!ipa_fn_summaries)
2899     ipa_fn_summary_alloc ();
2900 
2901   /* Create a new ipa_fn_summary.  */
2902   ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2903   ipa_fn_summaries->remove (node);
2904   class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2905   class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
2906 
2907   /* Estimate the stack size for the function if we're optimizing.  */
2908   self_stack_size = optimize && !node->thunk.thunk_p
2909 		    ? estimated_stack_frame_size (node) : 0;
2910   size_info->estimated_self_stack_size = self_stack_size;
2911   info->estimated_stack_size = self_stack_size;
2912 
2913   if (node->thunk.thunk_p)
2914     {
2915       ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
2916       predicate t = true;
2917 
2918       node->can_change_signature = false;
2919       es->call_stmt_size = eni_size_weights.call_cost;
2920       es->call_stmt_time = eni_time_weights.call_cost;
2921       info->account_size_time (ipa_fn_summary::size_scale
2922 			       * opt_for_fn (node->decl,
2923 				 param_uninlined_function_thunk_insns),
2924 			       opt_for_fn (node->decl,
2925 				 param_uninlined_function_thunk_time), t, t);
2926       t = predicate::not_inlined ();
2927       info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2928       ipa_update_overall_fn_summary (node);
2929       size_info->self_size = size_info->size;
2930       if (stdarg_p (TREE_TYPE (node->decl)))
2931 	{
2932 	  info->inlinable = false;
2933 	  node->callees->inline_failed = CIF_VARIADIC_THUNK;
2934 	}
2935       else
2936         info->inlinable = true;
2937     }
2938   else
2939     {
2940        /* Even is_gimple_min_invariant rely on current_function_decl.  */
2941        push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2942 
2943        /* During IPA profile merging we may be called w/o virtual SSA form
2944 	  built.  */
2945        update_ssa (TODO_update_ssa_only_virtuals);
2946 
2947        /* Can this function be inlined at all?  */
2948        if (!opt_for_fn (node->decl, optimize)
2949 	   && !lookup_attribute ("always_inline",
2950 				 DECL_ATTRIBUTES (node->decl)))
2951 	 info->inlinable = false;
2952        else
2953 	 info->inlinable = tree_inlinable_function_p (node->decl);
2954 
2955        /* Type attributes can use parameter indices to describe them.  */
2956        if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2957 	   /* Likewise for #pragma omp declare simd functions or functions
2958 	      with simd attribute.  */
2959 	   || lookup_attribute ("omp declare simd",
2960 				DECL_ATTRIBUTES (node->decl)))
2961 	 node->can_change_signature = false;
2962        else
2963 	 {
2964 	   /* Otherwise, inlinable functions always can change signature.  */
2965 	   if (info->inlinable)
2966 	     node->can_change_signature = true;
2967 	   else
2968 	     {
2969 	       /* Functions calling builtin_apply cannot change signature.  */
2970 	       for (e = node->callees; e; e = e->next_callee)
2971 		 {
2972 		   tree cdecl = e->callee->decl;
2973 		   if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2974 		       || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
2975 		     break;
2976 		 }
2977 	       node->can_change_signature = !e;
2978 	     }
2979 	 }
2980        analyze_function_body (node, early);
2981        pop_cfun ();
2982      }
2983 
2984   /* Inlining characteristics are maintained by the cgraph_mark_inline.  */
2985   size_info->size = size_info->self_size;
2986   info->estimated_stack_size = size_info->estimated_self_stack_size;
2987 
2988   /* Code above should compute exactly the same result as
2989      ipa_update_overall_fn_summary except for case when speculative
2990      edges are present since these are accounted to size but not
2991      self_size. Do not compare time since different order the roundoff
2992      errors result in slight changes.  */
2993   ipa_update_overall_fn_summary (node);
2994   if (flag_checking)
2995     {
2996       for (e = node->indirect_calls; e; e = e->next_callee)
2997        if (e->speculative)
2998 	 break;
2999       gcc_assert (e || size_info->size == size_info->self_size);
3000     }
3001 }
3002 
3003 
3004 /* Compute parameters of functions used by inliner using
3005    current_function_decl.  */
3006 
3007 static unsigned int
compute_fn_summary_for_current(void)3008 compute_fn_summary_for_current (void)
3009 {
3010   compute_fn_summary (cgraph_node::get (current_function_decl), true);
3011   return 0;
3012 }
3013 
3014 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
3015    KNOWN_CONTEXTS and KNOWN_AGGS.  */
3016 
3017 static bool
estimate_edge_devirt_benefit(struct cgraph_edge * ie,int * size,int * time,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs)3018 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3019 			      int *size, int *time,
3020 			      vec<tree> known_vals,
3021 			      vec<ipa_polymorphic_call_context> known_contexts,
3022 			      vec<ipa_agg_value_set> known_aggs)
3023 {
3024   tree target;
3025   struct cgraph_node *callee;
3026   class ipa_fn_summary *isummary;
3027   enum availability avail;
3028   bool speculative;
3029 
3030   if (!known_vals.length () && !known_contexts.length ())
3031     return false;
3032   if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3033     return false;
3034 
3035   target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
3036 					 known_aggs, &speculative);
3037   if (!target || speculative)
3038     return false;
3039 
3040   /* Account for difference in cost between indirect and direct calls.  */
3041   *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3042   *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3043   gcc_checking_assert (*time >= 0);
3044   gcc_checking_assert (*size >= 0);
3045 
3046   callee = cgraph_node::get (target);
3047   if (!callee || !callee->definition)
3048     return false;
3049   callee = callee->function_symbol (&avail);
3050   if (avail < AVAIL_AVAILABLE)
3051     return false;
3052   isummary = ipa_fn_summaries->get (callee);
3053   if (isummary == NULL)
3054     return false;
3055 
3056   return isummary->inlinable;
3057 }
3058 
3059 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3060    handle edge E with probability PROB.
3061    Set HINTS if edge may be devirtualized.
3062    KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
3063    site.  */
3064 
3065 static inline void
estimate_edge_size_and_time(struct cgraph_edge * e,int * size,int * min_size,sreal * time,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs,ipa_hints * hints)3066 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3067 			     sreal *time,
3068 			     vec<tree> known_vals,
3069 			     vec<ipa_polymorphic_call_context> known_contexts,
3070 			     vec<ipa_agg_value_set> known_aggs,
3071 			     ipa_hints *hints)
3072 {
3073   class ipa_call_summary *es = ipa_call_summaries->get (e);
3074   int call_size = es->call_stmt_size;
3075   int call_time = es->call_stmt_time;
3076   int cur_size;
3077 
3078   if (!e->callee && hints && e->maybe_hot_p ()
3079       && estimate_edge_devirt_benefit (e, &call_size, &call_time,
3080 				       known_vals, known_contexts, known_aggs))
3081     *hints |= INLINE_HINT_indirect_call;
3082   cur_size = call_size * ipa_fn_summary::size_scale;
3083   *size += cur_size;
3084   if (min_size)
3085     *min_size += cur_size;
3086   if (time)
3087     *time += ((sreal)call_time) * e->sreal_frequency ();
3088 }
3089 
3090 
3091 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3092    calls in NODE.  POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3093    describe context of the call site.
3094 
3095    Helper for estimate_calls_size_and_time which does the same but
3096    (in most cases) faster.  */
3097 
3098 static void
estimate_calls_size_and_time_1(struct cgraph_node * node,int * size,int * min_size,sreal * time,ipa_hints * hints,clause_t possible_truths,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs)3099 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3100 			        int *min_size, sreal *time,
3101 			        ipa_hints *hints,
3102 			        clause_t possible_truths,
3103 			        vec<tree> known_vals,
3104 			        vec<ipa_polymorphic_call_context> known_contexts,
3105 			        vec<ipa_agg_value_set> known_aggs)
3106 {
3107   struct cgraph_edge *e;
3108   for (e = node->callees; e; e = e->next_callee)
3109     {
3110       if (!e->inline_failed)
3111 	{
3112 	  gcc_checking_assert (!ipa_call_summaries->get (e));
3113 	  estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3114 					  hints,
3115 					  possible_truths,
3116 					  known_vals, known_contexts,
3117 					  known_aggs);
3118 	  continue;
3119 	}
3120       class ipa_call_summary *es = ipa_call_summaries->get (e);
3121 
3122       /* Do not care about zero sized builtins.  */
3123       if (!es->call_stmt_size)
3124 	{
3125 	  gcc_checking_assert (!es->call_stmt_time);
3126 	  continue;
3127 	}
3128       if (!es->predicate
3129 	  || es->predicate->evaluate (possible_truths))
3130 	{
3131 	  /* Predicates of calls shall not use NOT_CHANGED codes,
3132 	     so we do not need to compute probabilities.  */
3133 	  estimate_edge_size_and_time (e, size,
3134 				       es->predicate ? NULL : min_size,
3135 				       time,
3136 				       known_vals, known_contexts,
3137 				       known_aggs, hints);
3138 	}
3139     }
3140   for (e = node->indirect_calls; e; e = e->next_callee)
3141     {
3142       class ipa_call_summary *es = ipa_call_summaries->get (e);
3143       if (!es->predicate
3144 	  || es->predicate->evaluate (possible_truths))
3145 	estimate_edge_size_and_time (e, size,
3146 				     es->predicate ? NULL : min_size,
3147 				     time,
3148 				     known_vals, known_contexts, known_aggs,
3149 				     hints);
3150     }
3151 }
3152 
3153 /* Populate sum->call_size_time_table for edges from NODE.  */
3154 
3155 static void
summarize_calls_size_and_time(struct cgraph_node * node,ipa_fn_summary * sum)3156 summarize_calls_size_and_time (struct cgraph_node *node,
3157     			       ipa_fn_summary *sum)
3158 {
3159   struct cgraph_edge *e;
3160   for (e = node->callees; e; e = e->next_callee)
3161     {
3162       if (!e->inline_failed)
3163 	{
3164 	  gcc_checking_assert (!ipa_call_summaries->get (e));
3165 	  summarize_calls_size_and_time (e->callee, sum);
3166 	  continue;
3167 	}
3168       int size = 0;
3169       sreal time = 0;
3170 
3171       estimate_edge_size_and_time (e, &size, NULL, &time,
3172 				   vNULL, vNULL, vNULL, NULL);
3173 
3174       struct predicate pred = true;
3175       class ipa_call_summary *es = ipa_call_summaries->get (e);
3176 
3177       if (es->predicate)
3178 	pred = *es->predicate;
3179       sum->account_size_time (size, time, pred, pred, true);
3180     }
3181   for (e = node->indirect_calls; e; e = e->next_callee)
3182     {
3183       int size = 0;
3184       sreal time = 0;
3185 
3186       estimate_edge_size_and_time (e, &size, NULL, &time,
3187 				   vNULL, vNULL, vNULL, NULL);
3188       struct predicate pred = true;
3189       class ipa_call_summary *es = ipa_call_summaries->get (e);
3190 
3191       if (es->predicate)
3192 	pred = *es->predicate;
3193       sum->account_size_time (size, time, pred, pred, true);
3194     }
3195 }
3196 
3197 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3198    calls in NODE.  POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3199    describe context of the call site.  */
3200 
3201 static void
estimate_calls_size_and_time(struct cgraph_node * node,int * size,int * min_size,sreal * time,ipa_hints * hints,clause_t possible_truths,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs)3202 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3203 			      int *min_size, sreal *time,
3204 			      ipa_hints *hints,
3205 			      clause_t possible_truths,
3206 			      vec<tree> known_vals,
3207 			      vec<ipa_polymorphic_call_context> known_contexts,
3208 			      vec<ipa_agg_value_set> known_aggs)
3209 {
3210   class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3211   bool use_table = true;
3212 
3213   gcc_assert (node->callees || node->indirect_calls);
3214 
3215   /* During early inlining we do not calculate info for very
3216      large functions and thus there is no need for producing
3217      summaries.  */
3218   if (!ipa_node_params_sum)
3219     use_table = false;
3220   /* Do not calculate summaries for simple wrappers; it is waste
3221      of memory.  */
3222   else if (node->callees && node->indirect_calls
3223            && node->callees->inline_failed && !node->callees->next_callee)
3224     use_table = false;
3225   /* If there is an indirect edge that may be optimized, we need
3226      to go the slow way.  */
3227   else if ((known_vals.length ()
3228      	    || known_contexts.length ()
3229 	    || known_aggs.length ()) && hints)
3230     {
3231       class ipa_node_params *params_summary = IPA_NODE_REF (node);
3232       unsigned int nargs = params_summary
3233 			   ? ipa_get_param_count (params_summary) : 0;
3234 
3235       for (unsigned int i = 0; i < nargs && use_table; i++)
3236 	{
3237 	  if (ipa_is_param_used_by_indirect_call (params_summary, i)
3238 	      && ((known_vals.length () > i && known_vals[i])
3239 		  || (known_aggs.length () > i
3240 		      && known_aggs[i].items.length ())))
3241 	    use_table = false;
3242 	  else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3243 		   && (known_contexts.length () > i
3244 		       && !known_contexts[i].useless_p ()))
3245 	    use_table = false;
3246 	}
3247     }
3248 
3249   /* Fast path is via the call size time table.  */
3250   if (use_table)
3251     {
3252       /* Build summary if it is absent.  */
3253       if (!sum->call_size_time_table)
3254 	{
3255 	  predicate true_pred = true;
3256 	  sum->account_size_time (0, 0, true_pred, true_pred, true);
3257 	  summarize_calls_size_and_time (node, sum);
3258 	}
3259 
3260       int old_size = *size;
3261       sreal old_time = time ? *time : 0;
3262 
3263       if (min_size)
3264 	*min_size += (*sum->call_size_time_table)[0].size;
3265 
3266       unsigned int i;
3267       size_time_entry *e;
3268 
3269       /* Walk the table and account sizes and times.  */
3270       for (i = 0; vec_safe_iterate (sum->call_size_time_table, i, &e);
3271 	   i++)
3272 	if (e->exec_predicate.evaluate (possible_truths))
3273 	  {
3274 	    *size += e->size;
3275 	    if (time)
3276 	      *time += e->time;
3277 	  }
3278 
3279       /* Be careful and see if both methods agree.  */
3280       if ((flag_checking || dump_file)
3281 	  /* Do not try to sanity check when we know we lost some
3282 	     precision.  */
3283 	  && sum->call_size_time_table->length ()
3284 	     < ipa_fn_summary::max_size_time_table_size)
3285 	{
3286 	  estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3287 					  possible_truths, known_vals,
3288 					  known_contexts, known_aggs);
3289 	  gcc_assert (*size == old_size);
3290 	  if (time && (*time - old_time > 1 || *time - old_time < -1)
3291 	      && dump_file)
3292 	    fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
3293 		     old_time.to_double (),
3294 		     time->to_double ());
3295 	}
3296     }
3297   /* Slow path by walking all edges.  */
3298   else
3299     estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3300 				    possible_truths, known_vals, known_contexts,
3301 				    known_aggs);
3302 }
3303 
3304 /* Default constructor for ipa call context.
3305    Memory allocation of known_vals, known_contexts
3306    and known_aggs vectors is owned by the caller, but can
3307    be release by ipa_call_context::release.
3308 
3309    inline_param_summary is owned by the caller.  */
ipa_call_context(cgraph_node * node,clause_t possible_truths,clause_t nonspec_possible_truths,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs,vec<inline_param_summary> inline_param_summary)3310 ipa_call_context::ipa_call_context (cgraph_node *node,
3311 				    clause_t possible_truths,
3312 				    clause_t nonspec_possible_truths,
3313 				    vec<tree> known_vals,
3314 				    vec<ipa_polymorphic_call_context>
3315 				   	 known_contexts,
3316 				    vec<ipa_agg_value_set> known_aggs,
3317 				    vec<inline_param_summary>
3318 				   	 inline_param_summary)
3319 : m_node (node), m_possible_truths (possible_truths),
3320   m_nonspec_possible_truths (nonspec_possible_truths),
3321   m_inline_param_summary (inline_param_summary),
3322   m_known_vals (known_vals),
3323   m_known_contexts (known_contexts),
3324   m_known_aggs (known_aggs)
3325 {
3326 }
3327 
3328 /* Set THIS to be a duplicate of CTX.  Copy all relevant info.  */
3329 
3330 void
duplicate_from(const ipa_call_context & ctx)3331 ipa_call_context::duplicate_from (const ipa_call_context &ctx)
3332 {
3333   m_node = ctx.m_node;
3334   m_possible_truths = ctx.m_possible_truths;
3335   m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3336   class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3337   unsigned int nargs = params_summary
3338 		       ? ipa_get_param_count (params_summary) : 0;
3339 
3340   m_inline_param_summary = vNULL;
3341   /* Copy the info only if there is at least one useful entry.  */
3342   if (ctx.m_inline_param_summary.exists ())
3343     {
3344       unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3345 
3346       for (unsigned int i = 0; i < n; i++)
3347 	if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3348 	    && !ctx.m_inline_param_summary[i].useless_p ())
3349 	  {
3350             m_inline_param_summary
3351 		    = ctx.m_inline_param_summary.copy ();
3352 	    break;
3353 	  }
3354     }
3355   m_known_vals = vNULL;
3356   if (ctx.m_known_vals.exists ())
3357     {
3358       unsigned int n = MIN (ctx.m_known_vals.length (), nargs);
3359 
3360       for (unsigned int i = 0; i < n; i++)
3361 	if (ipa_is_param_used_by_indirect_call (params_summary, i)
3362 	    && ctx.m_known_vals[i])
3363 	  {
3364 	    m_known_vals = ctx.m_known_vals.copy ();
3365 	    break;
3366 	  }
3367     }
3368 
3369   m_known_contexts = vNULL;
3370   if (ctx.m_known_contexts.exists ())
3371     {
3372       unsigned int n = MIN (ctx.m_known_contexts.length (), nargs);
3373 
3374       for (unsigned int i = 0; i < n; i++)
3375 	if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3376 	    && !ctx.m_known_contexts[i].useless_p ())
3377 	  {
3378 	    m_known_contexts = ctx.m_known_contexts.copy ();
3379 	    break;
3380 	  }
3381     }
3382 
3383   m_known_aggs = vNULL;
3384   if (ctx.m_known_aggs.exists ())
3385     {
3386       unsigned int n = MIN (ctx.m_known_aggs.length (), nargs);
3387 
3388       for (unsigned int i = 0; i < n; i++)
3389 	if (ipa_is_param_used_by_indirect_call (params_summary, i)
3390 	    && !ctx.m_known_aggs[i].is_empty ())
3391 	  {
3392 	    m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs);
3393 	    break;
3394 	  }
3395     }
3396 }
3397 
3398 /* Release memory used by known_vals/contexts/aggs vectors.
3399    If ALL is true release also inline_param_summary.
3400    This happens when context was previously duplicated to be stored
3401    into cache.  */
3402 
3403 void
release(bool all)3404 ipa_call_context::release (bool all)
3405 {
3406   /* See if context is initialized at first place.  */
3407   if (!m_node)
3408     return;
3409   ipa_release_agg_values (m_known_aggs, all);
3410   if (all)
3411     {
3412       m_known_vals.release ();
3413       m_known_contexts.release ();
3414       m_inline_param_summary.release ();
3415     }
3416 }
3417 
3418 /* Return true if CTX describes the same call context as THIS.  */
3419 
3420 bool
equal_to(const ipa_call_context & ctx)3421 ipa_call_context::equal_to (const ipa_call_context &ctx)
3422 {
3423   if (m_node != ctx.m_node
3424       || m_possible_truths != ctx.m_possible_truths
3425       || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3426     return false;
3427 
3428   class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3429   unsigned int nargs = params_summary
3430 		       ? ipa_get_param_count (params_summary) : 0;
3431 
3432   if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3433     {
3434       for (unsigned int i = 0; i < nargs; i++)
3435 	{
3436 	  if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3437 	    continue;
3438 	  if (i >= m_inline_param_summary.length ()
3439 	      || m_inline_param_summary[i].useless_p ())
3440 	    {
3441 	      if (i < ctx.m_inline_param_summary.length ()
3442 		  && !ctx.m_inline_param_summary[i].useless_p ())
3443 		return false;
3444 	      continue;
3445 	    }
3446 	  if (i >= ctx.m_inline_param_summary.length ()
3447 	      || ctx.m_inline_param_summary[i].useless_p ())
3448 	    {
3449 	      if (i < m_inline_param_summary.length ()
3450 		  && !m_inline_param_summary[i].useless_p ())
3451 		return false;
3452 	      continue;
3453 	    }
3454 	  if (!m_inline_param_summary[i].equal_to
3455 	     	 (ctx.m_inline_param_summary[i]))
3456 	    return false;
3457 	}
3458     }
3459   if (m_known_vals.exists () || ctx.m_known_vals.exists ())
3460     {
3461       for (unsigned int i = 0; i < nargs; i++)
3462 	{
3463 	  if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3464 	    continue;
3465 	  if (i >= m_known_vals.length () || !m_known_vals[i])
3466 	    {
3467 	      if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i])
3468 		return false;
3469 	      continue;
3470 	    }
3471 	  if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i])
3472 	    {
3473 	      if (i < m_known_vals.length () && m_known_vals[i])
3474 		return false;
3475 	      continue;
3476 	    }
3477 	  if (m_known_vals[i] != ctx.m_known_vals[i])
3478 	    return false;
3479 	}
3480     }
3481   if (m_known_contexts.exists () || ctx.m_known_contexts.exists ())
3482     {
3483       for (unsigned int i = 0; i < nargs; i++)
3484 	{
3485 	  if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3486 	    continue;
3487 	  if (i >= m_known_contexts.length ()
3488 	      || m_known_contexts[i].useless_p ())
3489 	    {
3490 	      if (i < ctx.m_known_contexts.length ()
3491 		  && !ctx.m_known_contexts[i].useless_p ())
3492 		return false;
3493 	      continue;
3494 	    }
3495 	  if (i >= ctx.m_known_contexts.length ()
3496 	      || ctx.m_known_contexts[i].useless_p ())
3497 	    {
3498 	      if (i < m_known_contexts.length ()
3499 		  && !m_known_contexts[i].useless_p ())
3500 		return false;
3501 	      continue;
3502 	    }
3503 	  if (!m_known_contexts[i].equal_to
3504 	     	 (ctx.m_known_contexts[i]))
3505 	    return false;
3506 	}
3507     }
3508   if (m_known_aggs.exists () || ctx.m_known_aggs.exists ())
3509     {
3510       for (unsigned int i = 0; i < nargs; i++)
3511 	{
3512 	  if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3513 	    continue;
3514 	  if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ())
3515 	    {
3516 	      if (i < ctx.m_known_aggs.length ()
3517 		  && !ctx.m_known_aggs[i].is_empty ())
3518 		return false;
3519 	      continue;
3520 	    }
3521 	  if (i >= ctx.m_known_aggs.length ()
3522 	      || ctx.m_known_aggs[i].is_empty ())
3523 	    {
3524 	      if (i < m_known_aggs.length ()
3525 		  && !m_known_aggs[i].is_empty ())
3526 		return false;
3527 	      continue;
3528 	    }
3529 	  if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i]))
3530 	    return false;
3531 	}
3532     }
3533   return true;
3534 }
3535 
3536 /* Estimate size and time needed to execute call in the given context.
3537    Additionally determine hints determined by the context.  Finally compute
3538    minimal size needed for the call that is independent on the call context and
3539    can be used for fast estimates.  Return the values in RET_SIZE,
3540    RET_MIN_SIZE, RET_TIME and RET_HINTS.  */
3541 
3542 void
estimate_size_and_time(int * ret_size,int * ret_min_size,sreal * ret_time,sreal * ret_nonspecialized_time,ipa_hints * ret_hints)3543 ipa_call_context::estimate_size_and_time (int *ret_size,
3544 					  int *ret_min_size,
3545 					  sreal *ret_time,
3546 					  sreal *ret_nonspecialized_time,
3547 					  ipa_hints *ret_hints)
3548 {
3549   class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3550   size_time_entry *e;
3551   int size = 0;
3552   sreal time = 0;
3553   int min_size = 0;
3554   ipa_hints hints = 0;
3555   int i;
3556 
3557   if (dump_file && (dump_flags & TDF_DETAILS))
3558     {
3559       bool found = false;
3560       fprintf (dump_file, "   Estimating body: %s\n"
3561 	       "   Known to be false: ", m_node->dump_name ());
3562 
3563       for (i = predicate::not_inlined_condition;
3564 	   i < (predicate::first_dynamic_condition
3565 		+ (int) vec_safe_length (info->conds)); i++)
3566 	if (!(m_possible_truths & (1 << i)))
3567 	  {
3568 	    if (found)
3569 	      fprintf (dump_file, ", ");
3570 	    found = true;
3571 	    dump_condition (dump_file, info->conds, i);
3572 	  }
3573     }
3574 
3575   if (m_node->callees || m_node->indirect_calls)
3576     estimate_calls_size_and_time (m_node, &size, &min_size,
3577 				  ret_time ? &time : NULL,
3578 				  ret_hints ? &hints : NULL, m_possible_truths,
3579 				  m_known_vals, m_known_contexts, m_known_aggs);
3580 
3581   sreal nonspecialized_time = time;
3582 
3583   min_size += (*info->size_time_table)[0].size;
3584   for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3585     {
3586       bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3587 
3588       /* Because predicates are conservative, it can happen that nonconst is 1
3589 	 but exec is 0.  */
3590       if (exec)
3591         {
3592           bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3593 
3594 	  gcc_checking_assert (e->time >= 0);
3595 	  gcc_checking_assert (time >= 0);
3596 
3597 	  /* We compute specialized size only because size of nonspecialized
3598 	     copy is context independent.
3599 
3600 	     The difference between nonspecialized execution and specialized is
3601 	     that nonspecialized is not going to have optimized out computations
3602 	     known to be constant in a specialized setting.  */
3603 	  if (nonconst)
3604 	    size += e->size;
3605 	  if (!ret_time)
3606 	    continue;
3607 	  nonspecialized_time += e->time;
3608 	  if (!nonconst)
3609 	    ;
3610 	  else if (!m_inline_param_summary.exists ())
3611 	    {
3612 	      if (nonconst)
3613 	        time += e->time;
3614 	    }
3615 	  else
3616 	    {
3617 	      int prob = e->nonconst_predicate.probability
3618 					       (info->conds, m_possible_truths,
3619 					        m_inline_param_summary);
3620 	      gcc_checking_assert (prob >= 0);
3621 	      gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3622 	      if (prob == REG_BR_PROB_BASE)
3623 	        time += e->time;
3624 	      else
3625 	        time += e->time * prob / REG_BR_PROB_BASE;
3626 	    }
3627 	  gcc_checking_assert (time >= 0);
3628         }
3629      }
3630   gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3631   gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3632   gcc_checking_assert (min_size >= 0);
3633   gcc_checking_assert (size >= 0);
3634   gcc_checking_assert (time >= 0);
3635   /* nonspecialized_time should be always bigger than specialized time.
3636      Roundoff issues however may get into the way.  */
3637   gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3638 
3639   /* Roundoff issues may make specialized time bigger than nonspecialized
3640      time.  We do not really want that to happen because some heuristics
3641      may get confused by seeing negative speedups.  */
3642   if (time > nonspecialized_time)
3643     time = nonspecialized_time;
3644 
3645   if (ret_hints)
3646     {
3647       if (info->loop_iterations
3648 	  && !info->loop_iterations->evaluate (m_possible_truths))
3649 	hints |= INLINE_HINT_loop_iterations;
3650       if (info->loop_stride
3651 	  && !info->loop_stride->evaluate (m_possible_truths))
3652 	hints |= INLINE_HINT_loop_stride;
3653       if (info->scc_no)
3654 	hints |= INLINE_HINT_in_scc;
3655       if (DECL_DECLARED_INLINE_P (m_node->decl))
3656 	hints |= INLINE_HINT_declared_inline;
3657     }
3658 
3659   size = RDIV (size, ipa_fn_summary::size_scale);
3660   min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3661 
3662   if (dump_file && (dump_flags & TDF_DETAILS))
3663     fprintf (dump_file, "\n   size:%i time:%f nonspec time:%f\n", (int) size,
3664 	     time.to_double (), nonspecialized_time.to_double ());
3665   if (ret_time)
3666     *ret_time = time;
3667   if (ret_nonspecialized_time)
3668     *ret_nonspecialized_time = nonspecialized_time;
3669   if (ret_size)
3670     *ret_size = size;
3671   if (ret_min_size)
3672     *ret_min_size = min_size;
3673   if (ret_hints)
3674     *ret_hints = hints;
3675   return;
3676 }
3677 
3678 
3679 /* Estimate size and time needed to execute callee of EDGE assuming that
3680    parameters known to be constant at caller of EDGE are propagated.
3681    KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3682    and types for parameters.  */
3683 
3684 void
estimate_ipcp_clone_size_and_time(struct cgraph_node * node,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs,int * ret_size,sreal * ret_time,sreal * ret_nonspec_time,ipa_hints * hints)3685 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3686 				   vec<tree> known_vals,
3687 				   vec<ipa_polymorphic_call_context>
3688 				   known_contexts,
3689 				   vec<ipa_agg_value_set> known_aggs,
3690 				   int *ret_size, sreal *ret_time,
3691 				   sreal *ret_nonspec_time,
3692 				   ipa_hints *hints)
3693 {
3694   clause_t clause, nonspec_clause;
3695 
3696   /* TODO: Also pass known value ranges.  */
3697   evaluate_conditions_for_known_args (node, false, known_vals, vNULL,
3698 				      known_aggs, &clause, &nonspec_clause);
3699   ipa_call_context ctx (node, clause, nonspec_clause,
3700 		        known_vals, known_contexts,
3701 		        known_aggs, vNULL);
3702   ctx.estimate_size_and_time (ret_size, NULL, ret_time,
3703 			      ret_nonspec_time, hints);
3704 }
3705 
3706 /* Return stack frame offset where frame of NODE is supposed to start inside
3707    of the function it is inlined to.
3708    Return 0 for functions that are not inlined.  */
3709 
3710 HOST_WIDE_INT
ipa_get_stack_frame_offset(struct cgraph_node * node)3711 ipa_get_stack_frame_offset (struct cgraph_node *node)
3712 {
3713   HOST_WIDE_INT offset = 0;
3714   if (!node->inlined_to)
3715     return 0;
3716   node = node->callers->caller;
3717   while (true)
3718     {
3719       offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3720       if (!node->inlined_to)
3721 	return offset;
3722       node = node->callers->caller;
3723     }
3724 }
3725 
3726 
3727 /* Update summary information of inline clones after inlining.
3728    Compute peak stack usage.  */
3729 
3730 static void
inline_update_callee_summaries(struct cgraph_node * node,int depth)3731 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3732 {
3733   struct cgraph_edge *e;
3734 
3735   ipa_propagate_frequency (node);
3736   for (e = node->callees; e; e = e->next_callee)
3737     {
3738       if (!e->inline_failed)
3739 	inline_update_callee_summaries (e->callee, depth);
3740       else
3741 	ipa_call_summaries->get (e)->loop_depth += depth;
3742     }
3743   for (e = node->indirect_calls; e; e = e->next_callee)
3744     ipa_call_summaries->get (e)->loop_depth += depth;
3745 }
3746 
3747 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3748    When function A is inlined in B and A calls C with parameter that
3749    changes with probability PROB1 and C is known to be passthrough
3750    of argument if B that change with probability PROB2, the probability
3751    of change is now PROB1*PROB2.  */
3752 
3753 static void
remap_edge_change_prob(struct cgraph_edge * inlined_edge,struct cgraph_edge * edge)3754 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3755 			struct cgraph_edge *edge)
3756 {
3757   if (ipa_node_params_sum)
3758     {
3759       int i;
3760       class ipa_edge_args *args = IPA_EDGE_REF (edge);
3761       if (!args)
3762 	return;
3763       class ipa_call_summary *es = ipa_call_summaries->get (edge);
3764       class ipa_call_summary *inlined_es
3765 	= ipa_call_summaries->get (inlined_edge);
3766 
3767       if (es->param.length () == 0)
3768 	return;
3769 
3770       for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3771 	{
3772 	  struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3773 	  if (jfunc->type == IPA_JF_PASS_THROUGH
3774 	      || jfunc->type == IPA_JF_ANCESTOR)
3775 	    {
3776 	      int id = jfunc->type == IPA_JF_PASS_THROUGH
3777 		       ? ipa_get_jf_pass_through_formal_id (jfunc)
3778 		       : ipa_get_jf_ancestor_formal_id (jfunc);
3779 	      if (id < (int) inlined_es->param.length ())
3780 		{
3781 		  int prob1 = es->param[i].change_prob;
3782 		  int prob2 = inlined_es->param[id].change_prob;
3783 		  int prob = combine_probabilities (prob1, prob2);
3784 
3785 		  if (prob1 && prob2 && !prob)
3786 		    prob = 1;
3787 
3788 		  es->param[i].change_prob = prob;
3789 		}
3790 	    }
3791 	}
3792     }
3793 }
3794 
3795 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3796 
3797    Remap predicates of callees of NODE.  Rest of arguments match
3798    remap_predicate.
3799 
3800    Also update change probabilities.  */
3801 
3802 static void
remap_edge_summaries(struct cgraph_edge * inlined_edge,struct cgraph_node * node,class ipa_fn_summary * info,class ipa_node_params * params_summary,class ipa_fn_summary * callee_info,vec<int> operand_map,vec<int> offset_map,clause_t possible_truths,predicate * toplev_predicate)3803 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3804 		      struct cgraph_node *node,
3805 		      class ipa_fn_summary *info,
3806 		      class ipa_node_params *params_summary,
3807 		      class ipa_fn_summary *callee_info,
3808 		      vec<int> operand_map,
3809 		      vec<int> offset_map,
3810 		      clause_t possible_truths,
3811 		      predicate *toplev_predicate)
3812 {
3813   struct cgraph_edge *e, *next;
3814   for (e = node->callees; e; e = next)
3815     {
3816       predicate p;
3817       next = e->next_callee;
3818 
3819       if (e->inline_failed)
3820 	{
3821           class ipa_call_summary *es = ipa_call_summaries->get (e);
3822 	  remap_edge_change_prob (inlined_edge, e);
3823 
3824 	  if (es->predicate)
3825 	    {
3826 	      p = es->predicate->remap_after_inlining
3827 				     (info, params_summary,
3828 				      callee_info, operand_map,
3829 				      offset_map, possible_truths,
3830 				      *toplev_predicate);
3831 	      edge_set_predicate (e, &p);
3832 	    }
3833 	  else
3834 	    edge_set_predicate (e, toplev_predicate);
3835 	}
3836       else
3837 	remap_edge_summaries (inlined_edge, e->callee, info,
3838 		              params_summary, callee_info,
3839 			      operand_map, offset_map, possible_truths,
3840 			      toplev_predicate);
3841     }
3842   for (e = node->indirect_calls; e; e = next)
3843     {
3844       class ipa_call_summary *es = ipa_call_summaries->get (e);
3845       predicate p;
3846       next = e->next_callee;
3847 
3848       remap_edge_change_prob (inlined_edge, e);
3849       if (es->predicate)
3850 	{
3851 	  p = es->predicate->remap_after_inlining
3852 				 (info, params_summary,
3853 				  callee_info, operand_map, offset_map,
3854 			          possible_truths, *toplev_predicate);
3855 	  edge_set_predicate (e, &p);
3856 	}
3857       else
3858 	edge_set_predicate (e, toplev_predicate);
3859     }
3860 }
3861 
3862 /* Same as remap_predicate, but set result into hint *HINT.  */
3863 
3864 static void
remap_hint_predicate(class ipa_fn_summary * info,class ipa_node_params * params_summary,class ipa_fn_summary * callee_info,predicate ** hint,vec<int> operand_map,vec<int> offset_map,clause_t possible_truths,predicate * toplev_predicate)3865 remap_hint_predicate (class ipa_fn_summary *info,
3866 		      class ipa_node_params *params_summary,
3867 		      class ipa_fn_summary *callee_info,
3868 		      predicate **hint,
3869 		      vec<int> operand_map,
3870 		      vec<int> offset_map,
3871 		      clause_t possible_truths,
3872 		      predicate *toplev_predicate)
3873 {
3874   predicate p;
3875 
3876   if (!*hint)
3877     return;
3878   p = (*hint)->remap_after_inlining
3879 			 (info, params_summary, callee_info,
3880 			  operand_map, offset_map,
3881 			  possible_truths, *toplev_predicate);
3882   if (p != false && p != true)
3883     {
3884       if (!*hint)
3885 	set_hint_predicate (hint, p);
3886       else
3887 	**hint &= p;
3888     }
3889 }
3890 
3891 /* We inlined EDGE.  Update summary of the function we inlined into.  */
3892 
3893 void
ipa_merge_fn_summary_after_inlining(struct cgraph_edge * edge)3894 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
3895 {
3896   ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3897   struct cgraph_node *to = (edge->caller->inlined_to
3898 			    ? edge->caller->inlined_to : edge->caller);
3899   class ipa_fn_summary *info = ipa_fn_summaries->get (to);
3900   clause_t clause = 0;	/* not_inline is known to be false.  */
3901   size_time_entry *e;
3902   auto_vec<int, 8> operand_map;
3903   auto_vec<int, 8> offset_map;
3904   int i;
3905   predicate toplev_predicate;
3906   class ipa_call_summary *es = ipa_call_summaries->get (edge);
3907   class ipa_node_params *params_summary = (ipa_node_params_sum
3908 		 			   ? IPA_NODE_REF (to) : NULL);
3909 
3910   if (es->predicate)
3911     toplev_predicate = *es->predicate;
3912   else
3913     toplev_predicate = true;
3914 
3915   info->fp_expressions |= callee_info->fp_expressions;
3916 
3917   if (callee_info->conds)
3918     {
3919       auto_vec<tree, 32> known_vals;
3920       auto_vec<ipa_agg_value_set, 32> known_aggs;
3921       evaluate_properties_for_edge (edge, true, &clause, NULL,
3922 				    &known_vals, NULL, &known_aggs);
3923     }
3924   if (ipa_node_params_sum && callee_info->conds)
3925     {
3926       class ipa_edge_args *args = IPA_EDGE_REF (edge);
3927       int count = args ? ipa_get_cs_argument_count (args) : 0;
3928       int i;
3929 
3930       if (count)
3931 	{
3932 	  operand_map.safe_grow_cleared (count);
3933 	  offset_map.safe_grow_cleared (count);
3934 	}
3935       for (i = 0; i < count; i++)
3936 	{
3937 	  struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3938 	  int map = -1;
3939 
3940 	  /* TODO: handle non-NOPs when merging.  */
3941 	  if (jfunc->type == IPA_JF_PASS_THROUGH)
3942 	    {
3943 	      if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3944 		map = ipa_get_jf_pass_through_formal_id (jfunc);
3945 	      if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3946 		offset_map[i] = -1;
3947 	    }
3948 	  else if (jfunc->type == IPA_JF_ANCESTOR)
3949 	    {
3950 	      HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3951 	      if (offset >= 0 && offset < INT_MAX)
3952 		{
3953 		  map = ipa_get_jf_ancestor_formal_id (jfunc);
3954 		  if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3955 		    offset = -1;
3956 		  offset_map[i] = offset;
3957 		}
3958 	    }
3959 	  operand_map[i] = map;
3960 	  gcc_assert (map < ipa_get_param_count (params_summary));
3961 	}
3962     }
3963   sreal freq =  edge->sreal_frequency ();
3964   for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3965     {
3966       predicate p;
3967       p = e->exec_predicate.remap_after_inlining
3968 			     (info, params_summary,
3969 			      callee_info, operand_map,
3970 			      offset_map, clause,
3971 			      toplev_predicate);
3972       predicate nonconstp;
3973       nonconstp = e->nonconst_predicate.remap_after_inlining
3974 				     (info, params_summary,
3975 				      callee_info, operand_map,
3976 				      offset_map, clause,
3977 				      toplev_predicate);
3978       if (p != false && nonconstp != false)
3979 	{
3980 	  sreal add_time = ((sreal)e->time * freq);
3981 	  int prob = e->nonconst_predicate.probability (callee_info->conds,
3982 							clause, es->param);
3983 	  if (prob != REG_BR_PROB_BASE)
3984 	    add_time = add_time * prob / REG_BR_PROB_BASE;
3985 	  if (prob != REG_BR_PROB_BASE
3986 	      && dump_file && (dump_flags & TDF_DETAILS))
3987 	    {
3988 	      fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3989 		       (double) prob / REG_BR_PROB_BASE);
3990 	    }
3991 	  info->account_size_time (e->size, add_time, p, nonconstp);
3992 	}
3993     }
3994   remap_edge_summaries (edge, edge->callee, info, params_summary,
3995 		 	callee_info, operand_map,
3996 			offset_map, clause, &toplev_predicate);
3997   remap_hint_predicate (info, params_summary, callee_info,
3998 			&callee_info->loop_iterations,
3999 			operand_map, offset_map, clause, &toplev_predicate);
4000   remap_hint_predicate (info, params_summary, callee_info,
4001 			&callee_info->loop_stride,
4002 			operand_map, offset_map, clause, &toplev_predicate);
4003 
4004   HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
4005   HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
4006 
4007   if (info->estimated_stack_size < peak)
4008     info->estimated_stack_size = peak;
4009 
4010   inline_update_callee_summaries (edge->callee, es->loop_depth);
4011   if (info->call_size_time_table)
4012     {
4013       int edge_size = 0;
4014       sreal edge_time = 0;
4015 
4016       estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, vNULL,
4017 		      		   vNULL, vNULL, 0);
4018       /* Unaccount size and time of the optimized out call.  */
4019       info->account_size_time (-edge_size, -edge_time,
4020 	 		       es->predicate ? *es->predicate : true,
4021 	 		       es->predicate ? *es->predicate : true,
4022 			       true);
4023       /* Account new calls.  */
4024       summarize_calls_size_and_time (edge->callee, info);
4025     }
4026 
4027   /* Free summaries that are not maintained for inline clones/edges.  */
4028   ipa_call_summaries->remove (edge);
4029   ipa_fn_summaries->remove (edge->callee);
4030   ipa_remove_from_growth_caches (edge);
4031 }
4032 
4033 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4034    overall size and time.  Recompute it.
4035    If RESET is true also recompute call_time_size_table.  */
4036 
4037 void
ipa_update_overall_fn_summary(struct cgraph_node * node,bool reset)4038 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
4039 {
4040   class ipa_fn_summary *info = ipa_fn_summaries->get (node);
4041   class ipa_size_summary *size_info = ipa_size_summaries->get (node);
4042   size_time_entry *e;
4043   int i;
4044 
4045   size_info->size = 0;
4046   info->time = 0;
4047   for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4048     {
4049       size_info->size += e->size;
4050       info->time += e->time;
4051     }
4052   info->min_size = (*info->size_time_table)[0].size;
4053   if (reset)
4054     vec_free (info->call_size_time_table);
4055   if (node->callees || node->indirect_calls)
4056     estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4057 				  &info->time, NULL,
4058 				  ~(clause_t) (1 << predicate::false_condition),
4059 				  vNULL, vNULL, vNULL);
4060   size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4061   info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4062 }
4063 
4064 
4065 /* This function performs intraprocedural analysis in NODE that is required to
4066    inline indirect calls.  */
4067 
4068 static void
inline_indirect_intraprocedural_analysis(struct cgraph_node * node)4069 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4070 {
4071   ipa_analyze_node (node);
4072   if (dump_file && (dump_flags & TDF_DETAILS))
4073     {
4074       ipa_print_node_params (dump_file, node);
4075       ipa_print_node_jump_functions (dump_file, node);
4076     }
4077 }
4078 
4079 
4080 /* Note function body size.  */
4081 
4082 void
inline_analyze_function(struct cgraph_node * node)4083 inline_analyze_function (struct cgraph_node *node)
4084 {
4085   push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4086 
4087   if (dump_file)
4088     fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ());
4089   if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
4090     inline_indirect_intraprocedural_analysis (node);
4091   compute_fn_summary (node, false);
4092   if (!optimize)
4093     {
4094       struct cgraph_edge *e;
4095       for (e = node->callees; e; e = e->next_callee)
4096 	e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4097       for (e = node->indirect_calls; e; e = e->next_callee)
4098 	e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4099     }
4100 
4101   pop_cfun ();
4102 }
4103 
4104 
4105 /* Called when new function is inserted to callgraph late.  */
4106 
4107 void
insert(struct cgraph_node * node,ipa_fn_summary *)4108 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4109 {
4110   inline_analyze_function (node);
4111 }
4112 
4113 /* Note function body size.  */
4114 
4115 static void
ipa_fn_summary_generate(void)4116 ipa_fn_summary_generate (void)
4117 {
4118   struct cgraph_node *node;
4119 
4120   FOR_EACH_DEFINED_FUNCTION (node)
4121     if (DECL_STRUCT_FUNCTION (node->decl))
4122       node->versionable = tree_versionable_function_p (node->decl);
4123 
4124   ipa_fn_summary_alloc ();
4125 
4126   ipa_fn_summaries->enable_insertion_hook ();
4127 
4128   ipa_register_cgraph_hooks ();
4129 
4130   FOR_EACH_DEFINED_FUNCTION (node)
4131     if (!node->alias
4132 	&& (flag_generate_lto || flag_generate_offload|| flag_wpa
4133 	    || opt_for_fn (node->decl, optimize)))
4134       inline_analyze_function (node);
4135 }
4136 
4137 
4138 /* Write inline summary for edge E to OB.  */
4139 
4140 static void
read_ipa_call_summary(class lto_input_block * ib,struct cgraph_edge * e,bool prevails)4141 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4142 		       bool prevails)
4143 {
4144   class ipa_call_summary *es = prevails
4145 				? ipa_call_summaries->get_create (e) : NULL;
4146   predicate p;
4147   int length, i;
4148 
4149   int size = streamer_read_uhwi (ib);
4150   int time = streamer_read_uhwi (ib);
4151   int depth = streamer_read_uhwi (ib);
4152 
4153   if (es)
4154     {
4155       es->call_stmt_size = size;
4156       es->call_stmt_time = time;
4157       es->loop_depth = depth;
4158     }
4159 
4160   bitpack_d bp = streamer_read_bitpack (ib);
4161   if (es)
4162     es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4163   else
4164     bp_unpack_value (&bp, 1);
4165 
4166   p.stream_in (ib);
4167   if (es)
4168     edge_set_predicate (e, &p);
4169   length = streamer_read_uhwi (ib);
4170   if (length && es && e->possibly_call_in_translation_unit_p ())
4171     {
4172       es->param.safe_grow_cleared (length);
4173       for (i = 0; i < length; i++)
4174 	es->param[i].change_prob = streamer_read_uhwi (ib);
4175     }
4176   else
4177     {
4178       for (i = 0; i < length; i++)
4179 	streamer_read_uhwi (ib);
4180     }
4181 }
4182 
4183 
4184 /* Stream in inline summaries from the section.  */
4185 
4186 static void
inline_read_section(struct lto_file_decl_data * file_data,const char * data,size_t len)4187 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4188 		     size_t len)
4189 {
4190   const struct lto_function_header *header =
4191     (const struct lto_function_header *) data;
4192   const int cfg_offset = sizeof (struct lto_function_header);
4193   const int main_offset = cfg_offset + header->cfg_size;
4194   const int string_offset = main_offset + header->main_size;
4195   class data_in *data_in;
4196   unsigned int i, count2, j;
4197   unsigned int f_count;
4198 
4199   lto_input_block ib ((const char *) data + main_offset, header->main_size,
4200 		      file_data->mode_table);
4201 
4202   data_in =
4203     lto_data_in_create (file_data, (const char *) data + string_offset,
4204 			header->string_size, vNULL);
4205   f_count = streamer_read_uhwi (&ib);
4206   for (i = 0; i < f_count; i++)
4207     {
4208       unsigned int index;
4209       struct cgraph_node *node;
4210       class ipa_fn_summary *info;
4211       class ipa_node_params *params_summary;
4212       class ipa_size_summary *size_info;
4213       lto_symtab_encoder_t encoder;
4214       struct bitpack_d bp;
4215       struct cgraph_edge *e;
4216       predicate p;
4217 
4218       index = streamer_read_uhwi (&ib);
4219       encoder = file_data->symtab_node_encoder;
4220       node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4221 								index));
4222       info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4223       params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
4224       size_info = node->prevailing_p ()
4225 		  ? ipa_size_summaries->get_create (node) : NULL;
4226 
4227       int stack_size = streamer_read_uhwi (&ib);
4228       int size = streamer_read_uhwi (&ib);
4229       sreal time = sreal::stream_in (&ib);
4230 
4231       if (info)
4232 	{
4233 	  info->estimated_stack_size
4234 	    = size_info->estimated_self_stack_size = stack_size;
4235 	  size_info->size = size_info->self_size = size;
4236 	  info->time = time;
4237 	}
4238 
4239       bp = streamer_read_bitpack (&ib);
4240       if (info)
4241 	{
4242 	  info->inlinable = bp_unpack_value (&bp, 1);
4243 	  /* On the side of streaming out, there is still one bit
4244 	     streamed out between inlinable and fp_expressions bits,
4245 	     which was used for cilk+ before but now always false.
4246 	     To remove the bit packing need to bump LTO minor version,
4247 	     so unpack a dummy bit here to keep consistent instead.  */
4248 	  bp_unpack_value (&bp, 1);
4249 	  info->fp_expressions = bp_unpack_value (&bp, 1);
4250 	}
4251       else
4252 	{
4253 	  bp_unpack_value (&bp, 1);
4254 	  bp_unpack_value (&bp, 1);
4255 	  bp_unpack_value (&bp, 1);
4256 	}
4257 
4258       count2 = streamer_read_uhwi (&ib);
4259       gcc_assert (!info || !info->conds);
4260       if (info)
4261         vec_safe_reserve_exact (info->conds, count2);
4262       for (j = 0; j < count2; j++)
4263 	{
4264 	  struct condition c;
4265 	  unsigned int k, count3;
4266 	  c.operand_num = streamer_read_uhwi (&ib);
4267 	  c.code = (enum tree_code) streamer_read_uhwi (&ib);
4268 	  c.type = stream_read_tree (&ib, data_in);
4269 	  c.val = stream_read_tree (&ib, data_in);
4270 	  bp = streamer_read_bitpack (&ib);
4271 	  c.agg_contents = bp_unpack_value (&bp, 1);
4272 	  c.by_ref = bp_unpack_value (&bp, 1);
4273 	  if (c.agg_contents)
4274 	    c.offset = streamer_read_uhwi (&ib);
4275 	  count3 = streamer_read_uhwi (&ib);
4276 	  c.param_ops = NULL;
4277 	  if (info)
4278 	    vec_safe_reserve_exact (c.param_ops, count3);
4279 	  if (params_summary)
4280 	    ipa_set_param_used_by_ipa_predicates
4281 		    (params_summary, c.operand_num, true);
4282 	  for (k = 0; k < count3; k++)
4283 	    {
4284 	      struct expr_eval_op op;
4285 	      enum gimple_rhs_class rhs_class;
4286 	      op.code = (enum tree_code) streamer_read_uhwi (&ib);
4287 	      op.type = stream_read_tree (&ib, data_in);
4288 	      switch (rhs_class = get_gimple_rhs_class (op.code))
4289 		{
4290 		case GIMPLE_UNARY_RHS:
4291 		  op.index = 0;
4292 		  op.val[0] = NULL_TREE;
4293 		  op.val[1] = NULL_TREE;
4294 		  break;
4295 
4296 		case GIMPLE_BINARY_RHS:
4297 		case GIMPLE_TERNARY_RHS:
4298 		  bp = streamer_read_bitpack (&ib);
4299 		  op.index = bp_unpack_value (&bp, 2);
4300 		  op.val[0] = stream_read_tree (&ib, data_in);
4301 		  if (rhs_class == GIMPLE_BINARY_RHS)
4302 		    op.val[1] = NULL_TREE;
4303 		  else
4304 		    op.val[1] = stream_read_tree (&ib, data_in);
4305 		  break;
4306 
4307 		default:
4308 		  fatal_error (UNKNOWN_LOCATION,
4309 			       "invalid fnsummary in LTO stream");
4310 		}
4311 	      if (info)
4312 	        c.param_ops->quick_push (op);
4313 	    }
4314 	  if (info)
4315 	    info->conds->quick_push (c);
4316 	}
4317       count2 = streamer_read_uhwi (&ib);
4318       gcc_assert (!info || !info->size_time_table);
4319       if (info && count2)
4320         vec_safe_reserve_exact (info->size_time_table, count2);
4321       for (j = 0; j < count2; j++)
4322 	{
4323 	  class size_time_entry e;
4324 
4325 	  e.size = streamer_read_uhwi (&ib);
4326 	  e.time = sreal::stream_in (&ib);
4327 	  e.exec_predicate.stream_in (&ib);
4328 	  e.nonconst_predicate.stream_in (&ib);
4329 
4330 	  if (info)
4331 	    info->size_time_table->quick_push (e);
4332 	}
4333 
4334       p.stream_in (&ib);
4335       if (info)
4336         set_hint_predicate (&info->loop_iterations, p);
4337       p.stream_in (&ib);
4338       if (info)
4339         set_hint_predicate (&info->loop_stride, p);
4340       for (e = node->callees; e; e = e->next_callee)
4341 	read_ipa_call_summary (&ib, e, info != NULL);
4342       for (e = node->indirect_calls; e; e = e->next_callee)
4343 	read_ipa_call_summary (&ib, e, info != NULL);
4344     }
4345 
4346   lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4347 			 len);
4348   lto_data_in_delete (data_in);
4349 }
4350 
4351 
4352 /* Read inline summary.  Jump functions are shared among ipa-cp
4353    and inliner, so when ipa-cp is active, we don't need to write them
4354    twice.  */
4355 
4356 static void
ipa_fn_summary_read(void)4357 ipa_fn_summary_read (void)
4358 {
4359   struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4360   struct lto_file_decl_data *file_data;
4361   unsigned int j = 0;
4362 
4363   ipa_prop_read_jump_functions ();
4364   ipa_fn_summary_alloc ();
4365 
4366   while ((file_data = file_data_vec[j++]))
4367     {
4368       size_t len;
4369       const char *data
4370 	= lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4371 					&len);
4372       if (data)
4373 	inline_read_section (file_data, data, len);
4374       else
4375 	/* Fatal error here.  We do not want to support compiling ltrans units
4376 	   with different version of compiler or different flags than the WPA
4377 	   unit, so this should never happen.  */
4378 	fatal_error (input_location,
4379 		     "ipa inline summary is missing in input file");
4380     }
4381   ipa_register_cgraph_hooks ();
4382 
4383   gcc_assert (ipa_fn_summaries);
4384   ipa_fn_summaries->enable_insertion_hook ();
4385 }
4386 
4387 
4388 /* Write inline summary for edge E to OB.  */
4389 
4390 static void
write_ipa_call_summary(struct output_block * ob,struct cgraph_edge * e)4391 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4392 {
4393   class ipa_call_summary *es = ipa_call_summaries->get (e);
4394   int i;
4395 
4396   streamer_write_uhwi (ob, es->call_stmt_size);
4397   streamer_write_uhwi (ob, es->call_stmt_time);
4398   streamer_write_uhwi (ob, es->loop_depth);
4399 
4400   bitpack_d bp = bitpack_create (ob->main_stream);
4401   bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4402   streamer_write_bitpack (&bp);
4403 
4404   if (es->predicate)
4405     es->predicate->stream_out (ob);
4406   else
4407     streamer_write_uhwi (ob, 0);
4408   streamer_write_uhwi (ob, es->param.length ());
4409   for (i = 0; i < (int) es->param.length (); i++)
4410     streamer_write_uhwi (ob, es->param[i].change_prob);
4411 }
4412 
4413 
4414 /* Write inline summary for node in SET.
4415    Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4416    active, we don't need to write them twice.  */
4417 
4418 static void
ipa_fn_summary_write(void)4419 ipa_fn_summary_write (void)
4420 {
4421   struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4422   lto_symtab_encoder_iterator lsei;
4423   lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4424   unsigned int count = 0;
4425 
4426   for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4427        lsei_next_function_in_partition (&lsei))
4428     {
4429       cgraph_node *cnode = lsei_cgraph_node (lsei);
4430       if (cnode->definition && !cnode->alias)
4431 	count++;
4432     }
4433   streamer_write_uhwi (ob, count);
4434 
4435   for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4436        lsei_next_function_in_partition (&lsei))
4437     {
4438       cgraph_node *cnode = lsei_cgraph_node (lsei);
4439       if (cnode->definition && !cnode->alias)
4440 	{
4441 	  class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4442 	  class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4443 	  struct bitpack_d bp;
4444 	  struct cgraph_edge *edge;
4445 	  int i;
4446 	  size_time_entry *e;
4447 	  struct condition *c;
4448 
4449 	  streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4450 	  streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4451 	  streamer_write_hwi (ob, size_info->self_size);
4452 	  info->time.stream_out (ob);
4453 	  bp = bitpack_create (ob->main_stream);
4454 	  bp_pack_value (&bp, info->inlinable, 1);
4455 	  bp_pack_value (&bp, false, 1);
4456 	  bp_pack_value (&bp, info->fp_expressions, 1);
4457 	  streamer_write_bitpack (&bp);
4458 	  streamer_write_uhwi (ob, vec_safe_length (info->conds));
4459 	  for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4460 	    {
4461 	      int j;
4462 	      struct expr_eval_op *op;
4463 
4464 	      streamer_write_uhwi (ob, c->operand_num);
4465 	      streamer_write_uhwi (ob, c->code);
4466 	      stream_write_tree (ob, c->type, true);
4467 	      stream_write_tree (ob, c->val, true);
4468 	      bp = bitpack_create (ob->main_stream);
4469 	      bp_pack_value (&bp, c->agg_contents, 1);
4470 	      bp_pack_value (&bp, c->by_ref, 1);
4471 	      streamer_write_bitpack (&bp);
4472 	      if (c->agg_contents)
4473 		streamer_write_uhwi (ob, c->offset);
4474 	      streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4475 	      for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4476 		{
4477 		  streamer_write_uhwi (ob, op->code);
4478 		  stream_write_tree (ob, op->type, true);
4479 		  if (op->val[0])
4480 		    {
4481 		      bp = bitpack_create (ob->main_stream);
4482 		      bp_pack_value (&bp, op->index, 2);
4483 		      streamer_write_bitpack (&bp);
4484 		      stream_write_tree (ob, op->val[0], true);
4485 		      if (op->val[1])
4486 			stream_write_tree (ob, op->val[1], true);
4487 		    }
4488 		}
4489 	    }
4490 	  streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
4491 	  for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4492 	    {
4493 	      streamer_write_uhwi (ob, e->size);
4494 	      e->time.stream_out (ob);
4495 	      e->exec_predicate.stream_out (ob);
4496 	      e->nonconst_predicate.stream_out (ob);
4497 	    }
4498 	  if (info->loop_iterations)
4499 	    info->loop_iterations->stream_out (ob);
4500  	  else
4501 	    streamer_write_uhwi (ob, 0);
4502 	  if (info->loop_stride)
4503 	    info->loop_stride->stream_out (ob);
4504  	  else
4505 	    streamer_write_uhwi (ob, 0);
4506 	  for (edge = cnode->callees; edge; edge = edge->next_callee)
4507 	    write_ipa_call_summary (ob, edge);
4508 	  for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4509 	    write_ipa_call_summary (ob, edge);
4510 	}
4511     }
4512   streamer_write_char_stream (ob->main_stream, 0);
4513   produce_asm (ob, NULL);
4514   destroy_output_block (ob);
4515 
4516   ipa_prop_write_jump_functions ();
4517 }
4518 
4519 
4520 /* Release function summary.  */
4521 
4522 void
ipa_free_fn_summary(void)4523 ipa_free_fn_summary (void)
4524 {
4525   if (!ipa_call_summaries)
4526     return;
4527   ggc_delete (ipa_fn_summaries);
4528   ipa_fn_summaries = NULL;
4529   delete ipa_call_summaries;
4530   ipa_call_summaries = NULL;
4531   edge_predicate_pool.release ();
4532   /* During IPA this is one of largest datastructures to release.  */
4533   if (flag_wpa)
4534     ggc_trim ();
4535 }
4536 
4537 /* Release function summary.  */
4538 
4539 void
ipa_free_size_summary(void)4540 ipa_free_size_summary (void)
4541 {
4542   if (!ipa_size_summaries)
4543     return;
4544   delete ipa_size_summaries;
4545   ipa_size_summaries = NULL;
4546 }
4547 
4548 namespace {
4549 
4550 const pass_data pass_data_local_fn_summary =
4551 {
4552   GIMPLE_PASS, /* type */
4553   "local-fnsummary", /* name */
4554   OPTGROUP_INLINE, /* optinfo_flags */
4555   TV_INLINE_PARAMETERS, /* tv_id */
4556   0, /* properties_required */
4557   0, /* properties_provided */
4558   0, /* properties_destroyed */
4559   0, /* todo_flags_start */
4560   0, /* todo_flags_finish */
4561 };
4562 
4563 class pass_local_fn_summary : public gimple_opt_pass
4564 {
4565 public:
pass_local_fn_summary(gcc::context * ctxt)4566   pass_local_fn_summary (gcc::context *ctxt)
4567     : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4568   {}
4569 
4570   /* opt_pass methods: */
clone()4571   opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
execute(function *)4572   virtual unsigned int execute (function *)
4573     {
4574       return compute_fn_summary_for_current ();
4575     }
4576 
4577 }; // class pass_local_fn_summary
4578 
4579 } // anon namespace
4580 
4581 gimple_opt_pass *
make_pass_local_fn_summary(gcc::context * ctxt)4582 make_pass_local_fn_summary (gcc::context *ctxt)
4583 {
4584   return new pass_local_fn_summary (ctxt);
4585 }
4586 
4587 
4588 /* Free inline summary.  */
4589 
4590 namespace {
4591 
4592 const pass_data pass_data_ipa_free_fn_summary =
4593 {
4594   SIMPLE_IPA_PASS, /* type */
4595   "free-fnsummary", /* name */
4596   OPTGROUP_NONE, /* optinfo_flags */
4597   TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4598   0, /* properties_required */
4599   0, /* properties_provided */
4600   0, /* properties_destroyed */
4601   0, /* todo_flags_start */
4602   0, /* todo_flags_finish */
4603 };
4604 
4605 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4606 {
4607 public:
pass_ipa_free_fn_summary(gcc::context * ctxt)4608   pass_ipa_free_fn_summary (gcc::context *ctxt)
4609     : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4610       small_p (false)
4611   {}
4612 
4613   /* opt_pass methods: */
clone()4614   opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
set_pass_param(unsigned int n,bool param)4615   void set_pass_param (unsigned int n, bool param)
4616     {
4617       gcc_assert (n == 0);
4618       small_p = param;
4619     }
gate(function *)4620   virtual bool gate (function *) { return true; }
execute(function *)4621   virtual unsigned int execute (function *)
4622     {
4623       ipa_free_fn_summary ();
4624       if (!flag_wpa)
4625 	ipa_free_size_summary ();
4626       return 0;
4627     }
4628 
4629 private:
4630   bool small_p;
4631 }; // class pass_ipa_free_fn_summary
4632 
4633 } // anon namespace
4634 
4635 simple_ipa_opt_pass *
make_pass_ipa_free_fn_summary(gcc::context * ctxt)4636 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4637 {
4638   return new pass_ipa_free_fn_summary (ctxt);
4639 }
4640 
4641 namespace {
4642 
4643 const pass_data pass_data_ipa_fn_summary =
4644 {
4645   IPA_PASS, /* type */
4646   "fnsummary", /* name */
4647   OPTGROUP_INLINE, /* optinfo_flags */
4648   TV_IPA_FNSUMMARY, /* tv_id */
4649   0, /* properties_required */
4650   0, /* properties_provided */
4651   0, /* properties_destroyed */
4652   0, /* todo_flags_start */
4653   ( TODO_dump_symtab ), /* todo_flags_finish */
4654 };
4655 
4656 class pass_ipa_fn_summary : public ipa_opt_pass_d
4657 {
4658 public:
pass_ipa_fn_summary(gcc::context * ctxt)4659   pass_ipa_fn_summary (gcc::context *ctxt)
4660     : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4661 		      ipa_fn_summary_generate, /* generate_summary */
4662 		      ipa_fn_summary_write, /* write_summary */
4663 		      ipa_fn_summary_read, /* read_summary */
4664 		      NULL, /* write_optimization_summary */
4665 		      NULL, /* read_optimization_summary */
4666 		      NULL, /* stmt_fixup */
4667 		      0, /* function_transform_todo_flags_start */
4668 		      NULL, /* function_transform */
4669 		      NULL) /* variable_transform */
4670   {}
4671 
4672   /* opt_pass methods: */
execute(function *)4673   virtual unsigned int execute (function *) { return 0; }
4674 
4675 }; // class pass_ipa_fn_summary
4676 
4677 } // anon namespace
4678 
4679 ipa_opt_pass_d *
make_pass_ipa_fn_summary(gcc::context * ctxt)4680 make_pass_ipa_fn_summary (gcc::context *ctxt)
4681 {
4682   return new pass_ipa_fn_summary (ctxt);
4683 }
4684 
4685 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4686    within the same process.  For use by toplev::finalize.  */
4687 
4688 void
ipa_fnsummary_c_finalize(void)4689 ipa_fnsummary_c_finalize (void)
4690 {
4691   ipa_free_fn_summary ();
4692 }
4693